MiniGPT-4/eval_data/aokvqa/annotations/aokvqa_v1p0_test.json
2023-10-23 19:34:34 +02:00

1 line
2.0 MiB

[{"split": "test", "image_id": 487715, "question_id": "22dfoxvWKwTS6myafdKHfc", "question": "Who married a woman that has a similar with the sign next to the chocolate avec sign?", "choices": ["dom perignon", "cesare borgia", "mick jagger", "ice-t"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487715.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211663, "question_id": "234yKjUiFq5MijJ4QXLFZm", "question": "Why is he bent over?", "choices": ["slipping", "controlling board", "falling", "tired"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130190, "question_id": "23A3Z2fPHC3urCjdYZ6rqP", "question": "Whose first name is the same as the name on the skis?", "choices": ["jerry rice", "norm macdonald", "jim carrey", "scott glenn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130190.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224877, "question_id": "23AH3HEVXtqNELngMhrV9n", "question": "What did this lad likely injure here?", "choices": ["knee", "elbow", "rear", "board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343693, "question_id": "23RDeoHuFcmeLpw8oKQLiA", "question": "What does it look like the girl is holding?", "choices": ["baby", "cat", "egg", "tin foil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343693.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249903, "question_id": "23Tv9JH6JccH6qZALHt39X", "question": "People that come here want to see what kind of match?", "choices": ["baseball", "basketball", "golf", "tennis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249903.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 322663, "question_id": "23fbZij9XW6CWpVR2fBSJk", "question": "Based on the face what is the giraffe doing?", "choices": ["chewing", "ailing", "mating", "drinking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000322663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493466, "question_id": "24CRnoXgRhrmEN2Scpjk93", "question": "What would be most likely to hurt the man?", "choices": ["hooves", "teeth", "horns", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 497910, "question_id": "24JpeN3KbdyW6RHzyzD28S", "question": "Where might the elephants have been recently?", "choices": ["lake", "pond", "watering hole", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000497910.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215919, "question_id": "25WNkKZWWnYKCdetRbsJRM", "question": "What kind of fence is in front of the trees?", "choices": ["steel", "concrete", "wooden", "stone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215919.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157922, "question_id": "25yDHCyLeGZnLhK2c2CW22", "question": "What is this group of animals called?", "choices": ["herd", "flock", "community", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372449, "question_id": "289gdniVRHj3QAaLe27u6o", "question": "What are the cups made from?", "choices": ["plastic", "paper", "styrofoam", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372449.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201315, "question_id": "28kuweQ2NEgM7wPMPYrttT", "question": "What action is the man taking?", "choices": ["hiding", "shaking", "descending", "ascending"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 492345, "question_id": "297jgu3BMYgkRYz9nseD86", "question": "What is needed for this activity?", "choices": ["rain", "waves", "sand", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000492345.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347103, "question_id": "29NjNaWgYDHm95PtenXaJt", "question": "What does the building resemble most?", "choices": ["restaurant", "mall", "castle", "arcade"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347103.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561197, "question_id": "29envRrCv7U2RHoafZD24n", "question": "What activity is the flying animal engaged in?", "choices": ["mate call", "drinking", "eating", "procreating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561197.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542648, "question_id": "29fdevfUZTBBcYDHbq6ZnJ", "question": "What is on the left side of the sign?", "choices": ["fire hydrant", "lamppost", "car", "garbage can"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302272, "question_id": "2A2nv28RnC3cU2Lw6Axhz7", "question": "These taxidermied bears are located in what type of location?", "choices": ["store", "museum exhibit", "residence", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302272.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226222, "question_id": "2AzKxD6QQuDQoSTZvuPSAa", "question": "What are the people shown here involved in presently?", "choices": ["waving goodbye", "departing", "boarding", "deplaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226222.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102868, "question_id": "2B8pDPBoEEancB9KppTidN", "question": "What might this bird eat?", "choices": ["seafood", "steak", "inland seeds", "tree bark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483966, "question_id": "2D23EXSBgKG6kqNe8oMQko", "question": "What feature does this animal have?", "choices": ["trunk", "pouch", "whiskers", "wings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483966.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192207, "question_id": "2D3fVbRd88wNWvzBa64GPh", "question": "What type of devices are being used?", "choices": ["cooking", "electronic", "medical", "manual"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 480510, "question_id": "2EC3JFeHTySuTkDNoFRhtb", "question": "What piece of furniture would be useful here?", "choices": ["porch swing", "bookshelf", "dining table", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000480510.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303474, "question_id": "2ENKDr7RFmeCmSBGddk4nN", "question": "What branch of the armed forces does this vehicle belong to?", "choices": ["army", "air force", "navy", "marines"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383170, "question_id": "2EXfo6dCUxzhbNsYViKq4P", "question": "Why does the cow wear a bell?", "choices": ["frightens others", "entertainment", "for sale", "find easier"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000383170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331838, "question_id": "2Gh533W6dRsDYbomts256k", "question": "What is the righthand animal doing?", "choices": ["sniffing", "eating", "walking", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331838.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299627, "question_id": "2J2eP7rFoa4PHxtNFADbgr", "question": "Why is cover on the grill?", "choices": ["weather protection", "hide food", "style", "keep warm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163961, "question_id": "2JdnKjzqdkrbFxgA9h255w", "question": "What disease do most people believe they can get if they are cut by the item in the foreground?", "choices": ["polio", "diabetes", "tetanus", "smallpox"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163961.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299627, "question_id": "2Joor8PyGg3KQNg3pbQBvZ", "question": "What food would be hard to prepare with the black item?", "choices": ["hot dog", "egg", "steak", "chicken"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000299627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108632, "question_id": "2LbFSqG4j8HSFpXkFbHmq4", "question": "What would this container be used for?", "choices": ["travel", "food", "groceries", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108632.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329210, "question_id": "2LhEL4DAvS78gnAcXCPdcm", "question": "What is the large structure here?", "choices": ["airplane", "basket", "submarine", "tank"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329210.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5990, "question_id": "2LtnACy9J5febZfZM4STbd", "question": "What topping on this pizza is considered unusual when served on pizza?", "choices": ["tomato sauce", "cheese", "eggs", "tomato"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005990.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47371, "question_id": "2M9CoxxSsBkAfkojrx3AXM", "question": "The markings on its body are referred to as what?", "choices": ["tattoos", "speckles", "stripes", "moles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000047371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212915, "question_id": "2ManWZpinnrZVqtZrCKVvn", "question": "In what year was this album released?", "choices": ["2006", "2015", "1999", "2010"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212915.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501662, "question_id": "2Munn48n3dQ6otbqB2itjA", "question": "What month is on the picture?", "choices": ["february", "july", "january", "may"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501662.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372818, "question_id": "2NPzFsgwn9mV2vknDcJDbb", "question": "What should be changed to make the room look neater?", "choices": ["paint wall", "fold quilt", "vacuum floor", "empty garbage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87129, "question_id": "2Nat7iJs7SzF3sC7E8bQiz", "question": "Which country is famous for floating umbrella?", "choices": ["portugal", "nepal", "italy", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087129.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35387, "question_id": "2PNToLumM4Q6NnRpGT8Q9G", "question": "What is the correct PSI for a bicycle tire?", "choices": ["100-120psi", "80-130psi", "40-90psi", "50-80psi"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000035387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2815, "question_id": "2Pgu3PEGyFUrktsXkcUhBx", "question": "What would someone do on the white area near the lamp?", "choices": ["shower", "exercise", "eat", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002815.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534575, "question_id": "2RXvzYma9JXNQkwsmw6Q43", "question": "What is the thing that is out of focus called?", "choices": ["dock", "plane", "bird", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558952, "question_id": "2RkWFzDvq99eLyDZiofPqy", "question": "Which item would be most difficult to carry?", "choices": ["4th", "1st", "3rd", "2nd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468051, "question_id": "2S85UwBX9XV77NxiJrXXwi", "question": "What is the stuffed animal on the left wearing?", "choices": ["hat", "tracksuit", "bow", "pumpkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529563, "question_id": "2VDkndEXa78Lse3RdvFinn", "question": "From what are these animals most likely to run?", "choices": ["mice", "giraffes", "donkeys", "lions"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529563.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456862, "question_id": "2Vm6peXM4ob4etsf9q8wcf", "question": "Why is he wearing a suit?", "choices": ["uniform", "costume", "warmth", "business"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456862.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25564, "question_id": "2Vmd5BVEjmwZj6XFASGVu6", "question": "The coffee drinker here likes to add what to their coffee?", "choices": ["cream", "ice", "water", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025564.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464082, "question_id": "2WQwmpQUXg7gsE2tDaQtKV", "question": "What brand is the computer?", "choices": ["lenovo", "dell", "apple", "macintosh"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481559, "question_id": "2WaAYcGZ2KHiZVjGtKRhdY", "question": "What kind of fish this brown bear eats?", "choices": ["mackerel", "salmon", "cod", "herring"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481559.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376149, "question_id": "2YHjZfAdS5D6NjPeJE9FbD", "question": "Why has she covered her head?", "choices": ["fashion", "protection", "uniform", "disguise"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376149.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 370223, "question_id": "2Yuh6pbCoeheZ6aLrjpJuc", "question": "How many aero planes do you see?", "choices": ["two", "none", "one", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000370223.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459908, "question_id": "2ZtB367wfjVKuTCaPZmo2j", "question": "Why might they be near the trees?", "choices": ["sleep", "eat", "play", "drink"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000459908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79799, "question_id": "2aJS5D3BU3G5YfAKv3ZBzV", "question": "Who is famous for playing this sport?", "choices": ["jeff blauser", "frederick barbarossa", "dev patel", "bam margera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079799.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498642, "question_id": "2acKtXnMJia8ECsbCJcfQS", "question": "What is the fork sitting in?", "choices": ["pea salad", "potato salad", "egg salad", "fruit salad"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498642.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570918, "question_id": "2ap6Y5pPDYX5inBuWZUxGw", "question": "What formation is in the body of water?", "choices": ["sand dune", "kelp", "rocks", "cactus or"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570918.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6044, "question_id": "2b7U4t3BKLDoT7uWz93yny", "question": "Hat is in the back of the truck?", "choices": ["mattress", "tire", "box", "cannon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 150047, "question_id": "2bcv6DJEdFprtnzqnEg76G", "question": "The apparatus on his face was meant to protect against what?", "choices": ["gas", "bugs", "birds", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000150047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98759, "question_id": "2dTin6biVNmpb4Hm8mmQzN", "question": "What is the person cutting?", "choices": ["bread", "nails", "name tags", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098759.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458063, "question_id": "2ddhX7GzuX3g4kDVBMiyBT", "question": "What decade was the phone on the made in?", "choices": ["1980's", "2000's", "2020's", "1960's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577069, "question_id": "2dsSpt7MU63N2FgriXsqTo", "question": "What type of sound does the following birds produce?", "choices": ["caws", "hisses", "crows", "sings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418689, "question_id": "2eCWwucG7zxrFTTLw25jYG", "question": "This airline is the flag carrier for which area?", "choices": ["lima", "mumbai", "hong kong", "kingston"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418689.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364163, "question_id": "2fD9xwFJ6jJRzxf5KuSNLm", "question": "Why would the man use the object in his hand?", "choices": ["hiding", "camouflage", "rain protection", "fishing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000364163.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442851, "question_id": "2fiJRcdvmmsoXKRUTZgciY", "question": "The white utensil is made of what material?", "choices": ["plastic", "ceramic", "wood", "marble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546989, "question_id": "2g9VKEajzsHWFiTGsw4Ngm", "question": "What was this guy doing before he got here?", "choices": ["traveling", "movies", "jogging", "playing baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546989.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133801, "question_id": "2gJJga2oK3g5FDHkbYsHUa", "question": "Why is the door open?", "choices": ["for passengers", "for pictures", "for cleaning", "for luggage"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000133801.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516578, "question_id": "2gxhitzS9PLyiSwmy8osjn", "question": "Which one of the following would be trained how to use this?", "choices": ["detective", "babysitter", "professor", "firefighter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162766, "question_id": "2gxnWYUwxhcGbqdWsnYUQk", "question": "What is the man about to do?", "choices": ["hide", "eat", "dress", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58427, "question_id": "2hSAixSkmELYcKW6eRfuFV", "question": "What dried plant does the larger shaker contain?", "choices": ["black pepper", "sugar", "red pepper", "salt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000058427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365818, "question_id": "2iDWJXsPDE5QyqbDhUcJZc", "question": "This toilet is meant to be installed where only?", "choices": ["front wall", "corner", "ladies room", "back wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464667, "question_id": "2jgwoXDApy8DVXqR5oXAYJ", "question": "The large kite here is meant to mimic an animal normally found in what?", "choices": ["land", "space craft", "sea", "sky"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464667.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222718, "question_id": "2kYThf6ExB4rMW8esYmqtN", "question": "On which type street do these signs give warning?", "choices": ["none", "side", "highway", "avenue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222718.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106571, "question_id": "2mzCuBJ3ZqaBWKanTywHp2", "question": "What river is flowing in front of the clock tower?", "choices": ["rhine", "danube", "thames", "seine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214993, "question_id": "2oMMZUuXeupuraJfRtepcP", "question": "The lines between the squares is normally filled with what material?", "choices": ["clay", "grout", "sand", "marbles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38089, "question_id": "2odQJSNkaj5vkBoLH3iCJc", "question": "The bird here sees what that attracts his attention?", "choices": ["cheetos", "reflection", "eggs", "nest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038089.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511315, "question_id": "2q9g8ZbdUwiRyoJaa6hPsZ", "question": "What is this animal doing?", "choices": ["eating", "sleeping", "attacking", "climbing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182664, "question_id": "2qDTM9yP56gchmh87Q9hYT", "question": "What can be seen tucked under the corner of the keyboard?", "choices": ["paper", "hand", "air", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000182664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118711, "question_id": "2qMW2YFABBCS7fUEt9Gud3", "question": "What terrain is shown here?", "choices": ["valley", "savanna", "desert", "bay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204220, "question_id": "2r65o6uRizY8Mh9zgQw8R4", "question": "What is the person above holding?", "choices": ["watch", "nothing", "dog", "blower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 240491, "question_id": "2rgTTMPs2ff7kFQkWeTHVo", "question": "What skateboard move is the person performing?", "choices": ["900", "kickflip", "grind", "ollie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000240491.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463911, "question_id": "2s8RSb6YKdszh9x38AFtud", "question": "What type of store is this?", "choices": ["shoe", "book", "food", "record"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463911.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233939, "question_id": "2sBrXpjDt8Y2Bx4pf3SP2M", "question": "Which giraffe is 10 feet tall?", "choices": ["both", "neither", "smaller", "taller"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8500, "question_id": "2sVqRW2epZy8xA3MY6GWbV", "question": "What occupation does the man sitting hold?", "choices": ["linotype operator", "beef eater", "dog catcher", "shepherd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008500.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162349, "question_id": "2uT3yFyGdEeo6VdhH8QDUB", "question": "What type of person visits this location?", "choices": ["elected official", "tourist", "business executive", "resident"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456738, "question_id": "2ucRKLwq3XaV24YpEQZZkm", "question": "What might be likely to cause this train to stop in this area?", "choices": ["sleet", "hot sun", "rain", "avalanche"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456738.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89035, "question_id": "2vKFZWExBVvkdEbVsBhJRn", "question": "Which country is this stop sign most likely in?", "choices": ["usa", "korea", "africa", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089035.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 185937, "question_id": "2vVYaweEbK5ehXk6qNaJG3", "question": "What would be most likely to light your way during noon on a day when there is a blackout here?", "choices": ["basement light", "skylights", "small candles", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000185937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373530, "question_id": "2vcw8KGmUhenWvsYysHfsg", "question": "What does the skateboarder's crouch suggest he's doing next?", "choices": ["trick", "spin", "flip", "slalom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349422, "question_id": "2vegP2rdV57S2vP9a24Pgr", "question": "To remove waste from the bowl here where should you touch?", "choices": ["tank side", "bowl", "tank top", "back wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 266225, "question_id": "2vh7xBYcyGC8Fq7eDPLghT", "question": "Where is this person seated?", "choices": ["sofa", "stool", "chair", "bench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000266225.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296934, "question_id": "2wH9Qjj8fvVRGJtrhsQvZi", "question": "How is this food portioned?", "choices": ["slices", "glasses", "cubes", "scoops"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296934.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563004, "question_id": "2xPXVHCYP3iLHNcJkkLNg7", "question": "What is on the board?", "choices": ["pizza", "hamburger", "meat", "onion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563004.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532176, "question_id": "2xsKBTLHbWLyzxaDKbp29d", "question": "What will this man most likely do before going into public?", "choices": ["sing", "change ties", "tie tie", "tuck shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532176.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577390, "question_id": "2yYzfhrDDTotX7BfXgc4cG", "question": "What is required for this vehicle to move?", "choices": ["grass", "trail", "track", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331048, "question_id": "2yaEGcvAPdkhQ9xvMzoEeA", "question": "What is the item in the middle used for?", "choices": ["recording", "dinner", "cutting lawns", "scaring dogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331048.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463462, "question_id": "2zJCb8ByzQf5XXrzXurWbT", "question": "What type of seating is available?", "choices": ["sofa", "bench", "stool", "chair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463462.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328642, "question_id": "33iSsgqUxAFv43Vx3GKX9s", "question": "The instrument on the dashboard is measuring what?", "choices": ["temperature", "gas", "speed", "oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328642.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9092, "question_id": "34BjVF9oGcnTo6Vkazgrqt", "question": "What is the bottom writing on the chalkboard a reference to?", "choices": ["family guy", "simpsons", "starwars", "mickey mouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009092.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128933, "question_id": "34FQTaBL3NyS7tep8v6P2e", "question": "Which one of these pastimes is the computer owner sure to practice?", "choices": ["skiing", "snorkeling", "motorcycle riding", "skydiving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000128933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420301, "question_id": "353xMsoqmCRTXfoRMK47rz", "question": "Why is there a utensil in the bowl?", "choices": ["to mix", "to hide", "to store", "to clean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420301.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89784, "question_id": "35UV8ToGNQnYj7NiGWQ42Z", "question": "What is keeping the shoes on the boy?", "choices": ["laces", "skateboard", "pants", "arms"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352489, "question_id": "35kNxm86PnMUsGWMdYboRz", "question": "Who are the people in the vehicle?", "choices": ["government", "wildlife tourists", "police", "poachers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352489.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321180, "question_id": "36GN5TtMNnAtYgfNx7NYDy", "question": "What is the person standing on?", "choices": ["whale", "surf board", "water", "ground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321180.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334608, "question_id": "36KCKknSFP4wCPHuxrbSiG", "question": "What food group is he eating?", "choices": ["vegetable", "grains", "fruit", "dairy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387852, "question_id": "36VkkEGghhdz3YXCtcuK2L", "question": "What are the umbrellas near?", "choices": ["chairs", "snow", "babies", "cats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387852.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298533, "question_id": "38KzGLANrXWp5BfQW8E8D8", "question": "What is warming up this dish?", "choices": ["microwave", "toaster", "stove", "oven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183818, "question_id": "3AQMEJ9hM9C7g9MbQRqqqm", "question": "What species of animal shares the name for this item?", "choices": ["ovine", "canine", "rodent", "bovine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22502, "question_id": "3AnXb3vQ7s242TrG2bgQUb", "question": "If urinating only which button do you push?", "choices": ["both", "small one", "none", "large one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022502.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424036, "question_id": "3AvrcG6tpQPi9P74zY9G5v", "question": "Why is he seated at this table?", "choices": ["to sew", "to eat", "to paint", "to work"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457949, "question_id": "3BeS2uUkjCL9QSSrW6JN8G", "question": "What type of vehicles are being transported on the truck?", "choices": ["vans", "trucks", "cars", "motorcycles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457949.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58220, "question_id": "3BmcwcRsidzWW4ZyndnpuD", "question": "What is usually moving behind this man?", "choices": ["elephants", "papers", "guns", "vehicles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000058220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458062, "question_id": "3BtE6EWnnJCuxNQuQTg6iu", "question": "What is this skiing stance called?", "choices": ["snowplow", "cross country", "freeriding", "carving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206769, "question_id": "3DKgNxNsRHXhpcDBE99sDm", "question": "The name of the fruit on the left is also a what?", "choices": ["dog", "color", "coin", "country"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305286, "question_id": "3E9vpC9wYXevS9dJUWDpuZ", "question": "Which one would be the best for precision cutting?", "choices": ["red", "black", "blue", "silver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441510, "question_id": "3GDYctgzik7zpNq5cT7Xru", "question": "If this was the time in New York what time would it be in Brazil?", "choices": ["319", "445", "819", "119"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441510.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192396, "question_id": "3GyH4eRKNTvZ4frZJS4TnJ", "question": "Where does this type of activity usually take place?", "choices": ["omsk", "oahu", "liberia", "cairo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407691, "question_id": "3H4Z3WBngxXWARnCynMZyB", "question": "What would the wrench shown here do?", "choices": ["add ornamentation", "beat insects", "open hydrant", "nothing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000407691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363095, "question_id": "3HgYKwWKaqdmZu6W77f6o8", "question": "Why are the bears arrange thus?", "choices": ["for display", "random", "for sale", "attract attention"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288034, "question_id": "3LYq98onfecDjPrS9bchEB", "question": "What is the large white appliance used for?", "choices": ["cooking", "calling", "watching", "cooling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288034.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55480, "question_id": "3MNXZqAQYmVptUSyKBYRdv", "question": "Where is the person taking the photo of the plane probably located?", "choices": ["another plane", "ground", "onboard", "space-station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055480.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199530, "question_id": "3MZ2CK86Wt7zPdo83Yxyks", "question": "What is the nickname of the state this train services?", "choices": ["ocean state", "sunshine state", "bay state", "golden state"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517730, "question_id": "3Mb4MQztRrdy7AYBJMdh39", "question": "What could he keep on if he wanted to go swimming?", "choices": ["shoes", "hat", "shirt", "shorts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517730.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23856, "question_id": "3Mpwo77o2oHBKbBVJqfcQk", "question": "What kind of clef is depicted here?", "choices": ["treble", "bass", "tenor", "alto"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023856.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494871, "question_id": "3NTk7uiKobe8AxmDRXXNYE", "question": "What time of year is it?", "choices": ["winter", "fall", "summer", "spring"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283460, "question_id": "3NpLaLT4zprN23szEXs58d", "question": "Where is the main base for the airplane most likely located?", "choices": ["ireland", "poland", "england", "usa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52716, "question_id": "3S4XNJmYGErLXpz3sEc3dP", "question": "Which task would opposable thumbs help the cat accomplish?", "choices": ["tying shoes", "running", "eating", "playing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052716.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252763, "question_id": "3SXpvBTy4agRTtyMetQoyo", "question": "What part of the terrain could do the worst damage to the man's skies?", "choices": ["snow", "sleet", "ice", "rocks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252763.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55426, "question_id": "3SmVZDpFKRvbQRnYck2WnX", "question": "In what form was the original version of this product?", "choices": ["leaf", "stick", "paper", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055426.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250507, "question_id": "3T84swqEUaDxJz3YB4BUvU", "question": "What is the animal doing?", "choices": ["grooming", "flying", "devouring", "hunting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250507.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89985, "question_id": "3TA8XQdsMo5TVBTbnUrmV3", "question": "What is a common topping for the item on the plate?", "choices": ["oranges", "mustard", "sausage", "cherry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089985.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357085, "question_id": "3TYhpqiHdF2GnCYR5pDp96", "question": "Where might this animals be found that has the same first letter as their name?", "choices": ["aquarium", "safari", "zoo", "petshop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573768, "question_id": "3Tkb57LsdYsc9zoA7hZPoa", "question": "What is the chance the umbrella would be destroyed if the tide comes in?", "choices": ["low", "high", "medium", "no chance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432986, "question_id": "3TkvT6k76PL3gQULc9C4Jr", "question": "What will this person likely do next?", "choices": ["fish", "parasail", "surf", "fly kite"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000432986.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 370508, "question_id": "3Tuw25LbqrfHiP34Bbkz7b", "question": "Which direction is this surfer headed?", "choices": ["right", "left", "down", "up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000370508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572677, "question_id": "3U798aTY3s6G2EwFb4AUXb", "question": "The pans on and near the stove top can be used to create what?", "choices": ["muffins", "wedding cakes", "bran cereal", "jello"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572677.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513104, "question_id": "3UBPe7QjQ7NTCHMAZjyYQh", "question": "The person here uses their board keeping only what bodyparts on it?", "choices": ["right foot", "hands/ chest", "rear", "left foot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521329, "question_id": "3UbdRNcYcwWwn4c4XiN9XW", "question": "The dog is closest to what item?", "choices": ["computer", "phone", "cat", "wine glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92035, "question_id": "3VVkaRsWjbozpEmcrxgPKU", "question": "What is below the clock?", "choices": ["bench", "flag", "rope", "light"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092035.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579128, "question_id": "3XBbXeDpEYynF4WnhVgaWb", "question": "What advantage does this person gain by holding hands up?", "choices": ["balance", "uber ride", "nothing", "scare sharks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267483, "question_id": "3YZGfp7kqMjHCrQTUn2ajE", "question": "How much pizza is left to eat?", "choices": ["half", "all", "none", "two-thirds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267483.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 398348, "question_id": "3ZiPZ8HD2QhQy9X7zS9YDV", "question": "What type of transportation is shown?", "choices": ["air", "road", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000398348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99117, "question_id": "3aapfHjXmuAGe6omSamVM4", "question": "What are the paper towels for?", "choices": ["cleaning floor", "carrying food", "drying hands", "cleaning mirror"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099117.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513259, "question_id": "3afHnWoDWk8oSPKaUSPwZe", "question": "What would one expect the statue to do if it were a real human?", "choices": ["eat", "dance", "cry", "sew"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513259.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168177, "question_id": "3aiqsaDFPEgfQp8okgmz97", "question": "What type of tower is this middle building typically referred to as?", "choices": ["iron tower", "age tower", "time tower", "clock tower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7038, "question_id": "3bDK8jyLKQXVibkChRm2wj", "question": "What do the bindings over its eyes prevent?", "choices": ["mating", "sun glare", "running away", "looking sideways"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007038.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18972, "question_id": "3dXy7T93b5R2HDd88Ni74n", "question": "What are they trying to build?", "choices": ["ramp", "cover", "fort", "bench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000018972.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74890, "question_id": "3eTHFzA4ZSBxSCXETHhngg", "question": "What is the nickname that people call the mini skateboards?", "choices": ["small boards", "thumb boards", "finger boards", "plastic boards"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000074890.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 531712, "question_id": "3edJdRZjhJq76mhqcWcsU7", "question": "What can best be used to describe the person currently?", "choices": ["soaked", "sleeping", "hot", "dry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000531712.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319377, "question_id": "3ejvRjpAC5rSRpPAUsmQ2n", "question": "What most indicates that it is windy and ideal for kite flying?", "choices": ["rocks", "sand", "clouds", "waves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269787, "question_id": "3f8MA7kzjoYx2K5MdFq9cA", "question": "What type of kitchen is shown?", "choices": ["commercial", "residential", "truck", "hospital"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269787.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 213793, "question_id": "3fAeaYS5CZTrStRj7WGK5U", "question": "What pastime does the person sleeping here enjoy when resting?", "choices": ["tennis", "tv", "reading", "gaming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000213793.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190777, "question_id": "3fUoJatsJucgXKRgHN4Lob", "question": "What type of transportation is this?", "choices": ["land", "water", "rail", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454091, "question_id": "3fg3VKcCuze4dAVMGZPUFe", "question": "What is in the soil?", "choices": ["seedling", "cat", "dog", "plant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454091.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58742, "question_id": "3gTSzBoKiqmwGj9rjMWTqN", "question": "If accessed what comes out of the red post like item here?", "choices": ["nothing", "gas", "water", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000058742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482519, "question_id": "3giTHA53Dh58f6iZYB6jCN", "question": "Where is the water falling from?", "choices": ["waves", "waterfall", "hose", "sky"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482519.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351691, "question_id": "3hCwP9GoMstMiiH2J8e6oP", "question": "Which one of these items would it be more appropriate to put in this case?", "choices": ["marbles", "pants", "rocks", "oysters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1172, "question_id": "3hgsUQe76nVvFVVGV9thdE", "question": "What type of animal is shown?", "choices": ["reptile", "stuffed", "domestic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000001172.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573220, "question_id": "3kjgqPr4xwLr4CaHHT6fwb", "question": "What is the most common type of hardwood flooring?", "choices": ["oak", "walnut", "mahogany", "ash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428631, "question_id": "3n7FbciBcMnK2hDDkVZeLW", "question": "How many legs does the animal have?", "choices": ["nine", "two", "three", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127333, "question_id": "3ogMMYG8Ngy7rPi4A4tbXa", "question": "What room of the house is unaccounted for?", "choices": ["entry way", "kitchen", "bedroom", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158496, "question_id": "3q5cNFijL8SwpT768Mcmg9", "question": "What are red and can be seen growing on the plants?", "choices": ["peppers", "berries", "onion", "apples"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158496.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486861, "question_id": "3q9urkaWkCX6LrkDPKUdYE", "question": "What surface is the giraffe in the foreground walking on?", "choices": ["dirt", "rocks", "grass", "pavement"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486861.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383650, "question_id": "3qNSctQyJ3HH5MMVmdzAQq", "question": "What color is the stuff coming from the top of the vehicle?", "choices": ["black", "blue", "pink", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000383650.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342873, "question_id": "3qphfDWcf4Ea7hmjTGmS78", "question": "These items have been abandoned for what likely time frame?", "choices": ["years", "minutes", "months", "weeks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342873.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34638, "question_id": "3r4ewrQnkfag7qKij4PgjD", "question": "What word best describes this animal?", "choices": ["burly", "miniature", "two-legged", "petite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410811, "question_id": "3r8FJJBckpYoBuGS8Xq7SR", "question": "This map declares about what?", "choices": ["passenger route", "city map", "train route", "platform route"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188714, "question_id": "3tZ3Wu2vHbyJX6CL35LDU4", "question": "What is the larger bear doing?", "choices": ["making selfie", "eating", "singing", "hibernating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495230, "question_id": "3ttsEetSMobXjq9FgaX3Qm", "question": "What does this device do to the oranges?", "choices": ["slices", "juices", "peels", "cuts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339300, "question_id": "3uetX3867rhiKUivbTJtT2", "question": "What dog has the highest IQ?", "choices": ["labrador", "papillon", "golden retriever", "poodle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17185, "question_id": "3uqiLUzKdx2JJuoiXvW2md", "question": "What kind of clothing is on the animal?", "choices": ["bathing suit", "vest", "jacket", "sweater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017185.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22573, "question_id": "3wqxEMG5qyTojdxxGCDaqY", "question": "From which plant part does the most orange item shown here come?", "choices": ["none", "fruit", "banana tree", "root"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360492, "question_id": "3xLKojKRSnwjBRQGdPkSQS", "question": "What does this dog most want to shake now?", "choices": ["hands", "tail", "head", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419654, "question_id": "3yKkLxuTdRbREviQwyEN5H", "question": "How many towels are on the rack?", "choices": ["three", "five", "two", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419654.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301833, "question_id": "3yVHpGJr5ZsemY8ukW6h9J", "question": "What is the most powerful piece in the game depicted on his tie?", "choices": ["king", "bishop", "rook", "queen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369051, "question_id": "3ybuUV9ALPPrdQoGgDayT3", "question": "On what peninsula does this road lie?", "choices": ["lizard", "dingle", "yorke", "kenai"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543591, "question_id": "3yoFut6yPwKG4PntYxuisY", "question": "What is on top of the box?", "choices": ["flower", "knife", "bee", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543591.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309691, "question_id": "3zDpfKxnt8o8d4LwPNveL3", "question": "If threatened which body part are these animals best defense?", "choices": ["dewlaps", "tails", "horns", "tongues"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230958, "question_id": "3zQkxPcfyVAckJLRuD87Rp", "question": "What was the last person to sit on the bench doing?", "choices": ["eating", "sleeping", "studying", "exercising"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209735, "question_id": "3zeQ2C4kSRTTBfiaN4Cs7b", "question": "What feature of the horses are similar color?", "choices": ["nose", "legs", "mane", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563956, "question_id": "3zypor2vN46uhD76vgjiDm", "question": "What type of water is the bird in?", "choices": ["river", "pond", "lake", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563956.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326888, "question_id": "42dAaRoJaFphLCj9kGyFCN", "question": "What is the job of this horse?", "choices": ["carry", "push", "show", "haul"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000326888.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91897, "question_id": "42tSSBfHieZc9sckdaASLi", "question": "What kind of person stays here?", "choices": ["acrobat", "artist", "child", "midget"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091897.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391257, "question_id": "43mBxfEipqm9CVtfkCEtbV", "question": "What activity are these zebras doing right now?", "choices": ["mounting", "sleeping", "galloping", "evading predators"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 580970, "question_id": "45R56XPbc7jpwDvPEpd5eA", "question": "The clock number are in?", "choices": ["latin", "roman", "code", "english"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000580970.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557331, "question_id": "46W58SGnXvzcSBTCQSTkb7", "question": "The person is doing a sport in what environment?", "choices": ["snow", "desert", "jungle", "lake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557331.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404690, "question_id": "46W9fHNyLQpNjTffVV6KzK", "question": "What type of salad is this?", "choices": ["caesar", "caprese", "egg", "potato"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404690.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3896, "question_id": "479LLWc5XAqjnJJtG4NYEc", "question": "What are the trees lacking that they would often have?", "choices": ["branches", "growth", "leaves", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003896.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314748, "question_id": "4ADbxBnKfvufX7gCMsMTEX", "question": "What is seen flying in the air?", "choices": ["kite", "bird", "plane", "helicopter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314748.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 240958, "question_id": "4Cp7XYfoPPTjbefH9JV2cA", "question": "The animals here are in a sort of what?", "choices": ["zoo", "beef farm", "private lawn", "meat factory"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000240958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 148567, "question_id": "4DFcDPgoENU4ikqBXNH7f5", "question": "What is needed for this activity?", "choices": ["wind", "sand", "snow", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000148567.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308056, "question_id": "4DWRrB6ckmbMoEWWAhmxMA", "question": "Which of this surfers legs has something tied to it?", "choices": ["left", "right", "none", "both"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442884, "question_id": "4DywHduPLeCRgNyaBXaKHA", "question": "One must have lots of which one of the following if one wants to own one of these animals?", "choices": ["acres", "computers", "degrees", "children"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442884.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197822, "question_id": "4E9NmzqXxWBGnBg2T5R697", "question": "In what year was this airport decommissioned?", "choices": ["2008", "2021", "1990", "1995"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197822.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462119, "question_id": "4FkqdyyPT5gULwKF5ekugs", "question": "How is this fixture powered?", "choices": ["gas", "sun", "coal", "electric"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462119.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262356, "question_id": "4Fnd5UN4Eg2ziUiv2aS5xE", "question": "What did she just do?", "choices": ["tossed frisbee", "woke up", "dropped skis", "ate lunch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262356.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 208090, "question_id": "4FsRjjWSqDDVndEywFwsD5", "question": "Which seasonal Olympic game it is?", "choices": ["autumn", "spring", "summer", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000208090.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84374, "question_id": "4HBjivSedodot6fFz6drgZ", "question": "When facing this street who is allowed to continue forward at this exact moment?", "choices": ["taxis", "cars", "nobody", "pedestrians"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084374.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289205, "question_id": "4HqLbCNa9FKZHTxJFYP4J3", "question": "What is the bright green area on the train tracks?", "choices": ["paint", "moss", "foam", "jelly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289205.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3853, "question_id": "4J53XXdTyyT9HyTVSknpZZ", "question": "What reason is likely the reason you may not travel down West Lake Street?", "choices": ["mardi gras", "road work", "cash giveaway", "rush hour"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363535, "question_id": "4JrDdwsqcSKUsYqUT5v7fj", "question": "What symptom does the person who just used the toilet have?", "choices": ["vomiting", "diarrhea", "dizziness", "headache"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395389, "question_id": "4K6L4GHLmiEcxYgsCHgdtm", "question": "What would this vehicle be used for transporting?", "choices": ["dogs", "married couple", "three clowns", "baseball team"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 480378, "question_id": "4LQfSX9kRrEjSFhbXJmgUh", "question": "What kind of car is compatible with this pump?", "choices": ["tesla", "ford", "gmc", "chevrolet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000480378.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537030, "question_id": "4MBQKjmYAasZrfk7TwvWd9", "question": "What keeps the kite from flying away?", "choices": ["string", "chain", "fiber optics", "bungees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537030.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301354, "question_id": "4NdJYV8YLdpXexxsRTB9B2", "question": "The person is doing what?", "choices": ["eating", "falling", "sleeping", "flying"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518963, "question_id": "4Nqcijo53zb8MFofY6V9DA", "question": "Why is he wearing glasses?", "choices": ["sunshine", "to read", "uniform", "costume"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316222, "question_id": "4PLXn4qnBq78JQEjHBgRxQ", "question": "What is this mini fridge mainly being used for?", "choices": ["produce", "meat", "beer", "frozen food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316222.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507538, "question_id": "4PSdCW2wMKg8UEjwgJfrD5", "question": "What is the person who owns this dog wearing?", "choices": ["nothing", "jeans", "stockings", "tutu"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462586, "question_id": "4Pb7cyGNFy8Y76HqNGx4D6", "question": "This man look like he is most likely to be what?", "choices": ["scientist", "ufc champion", "weightlifter", "wwe wrestler"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000462586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546898, "question_id": "4PgKMiajBFx2f5WXkinWXB", "question": "What is the air temperature surrounding this roadway?", "choices": ["cold", "freezing", "warm", "hot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546898.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19305, "question_id": "4Q7r6PwTViBDTNteQXzcpe", "question": "When it lands which part of the skateboard here will touch the ground first?", "choices": ["wheels", "top", "side", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019305.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302816, "question_id": "4QKmHFKVTW4zA7ahENAaDd", "question": "What is the person holding in the air?", "choices": ["their hand", "sword", "newborn baby", "rifle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302816.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350906, "question_id": "4QTm3yu3Z7GpcP3PUs8xhw", "question": "In what room is this dog located?", "choices": ["bathroom", "bedroom", "kitchen", "garage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537600, "question_id": "4QqBvRtEpQ2TznzozUiYvJ", "question": "What is different about this street sign compared to what we normally see?", "choices": ["shape", "font", "size", "color"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8242, "question_id": "4RHddBdcvVg6g6gRERwCjP", "question": "Which operating system is the computer connected to the wireless mouse and keyboard running?", "choices": ["chrome os", "windows", "linux", "macos"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561904, "question_id": "4S3X2nbTxAtz9i9RTkyKRA", "question": "From which video game console is the controller on top of the ping pong racquet used with?", "choices": ["nintendo wii", "nintendo gameboy", "sony playstation", "microsoft xbox"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235908, "question_id": "4S3jpMVKyQzgnuziRUBaHB", "question": "What kind of activity needs to be performed behind the giraffe?", "choices": ["plumbing", "burning", "landscaping", "planting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424540, "question_id": "4TDMeWLdv53KtspDniMjJ2", "question": "What is the big screen on the right?", "choices": ["gps", "tv", "microwave", "computer monitor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424540.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533432, "question_id": "4VTjsjznpSfdsgskbnenAV", "question": "What are the animals engaged in?", "choices": ["standing", "running", "hunting", "grazing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533432.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551290, "question_id": "4WA5iEMoz3d4mUrPgAodT3", "question": "What object is not in this bathroom?", "choices": ["toilet", "toilet paper", "book", "poster"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551290.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536537, "question_id": "4WTRY8nrhYAyuNF8KMoTMq", "question": "What is the same color as the longest stripe on the animal on the left?", "choices": ["black bear", "blue jay", "red robin", "yellow canary"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437090, "question_id": "4Ws4AWRYmPG5L3wf9wQRDF", "question": "What type of energy goes through the tower right of the train?", "choices": ["mechanical", "heat", "electrical", "nuclear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000437090.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31290, "question_id": "4Wtwm8QizBNif5VHk3ngCz", "question": "What is the bear laying on top of?", "choices": ["pillow", "bush", "bed", "tree log"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031290.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570918, "question_id": "4XWrvVQShihTV27suWXhZe", "question": "What animal has wings similar to these boats?", "choices": ["pidgeon", "ant", "butterfly", "dove"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570918.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260079, "question_id": "4XfZFySGitCP7XdmSZKK7D", "question": "Who would usually wear the exact outfit the person is wearing?", "choices": ["nun", "santa claus", "army general", "student"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513644, "question_id": "4XnAcUZ6JxYAQUyhSzYj6A", "question": "What is in the foreground?", "choices": ["sheep", "stream", "crop", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513644.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458788, "question_id": "4ZhoWvmBhahDjVeZvzF5dJ", "question": "What type of area are these people walking in?", "choices": ["forest", "coastal", "arctic", "tropical"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458788.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437090, "question_id": "4ZxDeJv7kN5tU4LWwc2pCE", "question": "What is the tower behind the train used for?", "choices": ["am radio", "cell service", "fm radio", "shortwave radio"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000437090.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434781, "question_id": "4aHzmmYeN9nUB5nQSoPcQZ", "question": "The animal here is unlikely to go far in which direction?", "choices": ["right", "left", "forward", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82723, "question_id": "4aZgN6UH3AUYEtEGbo8P2u", "question": "What is the man wearing on his head?", "choices": ["helmet", "beanie", "baseball hat", "chef hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59800, "question_id": "4b3xiN5bwUnsFqXZQnzPrk", "question": "What are the black lines for?", "choices": ["gas wires", "petrol lines", "electricity wires", "phone wires"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311364, "question_id": "4bZzzu3ydDSPe7Ua6dH4YQ", "question": "Where are the people located?", "choices": ["plateau", "rainforest", "savannah", "town"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412596, "question_id": "4bmunQDFoYCqKiAorCmudL", "question": "What kind of man-worn object is behind the cat?", "choices": ["socks", "heels", "flip-flops", "sneakers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412596.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9352, "question_id": "4cZLkwbu8THUcH8NQsEYfs", "question": "The item on the floor with stickers on it is usually found where?", "choices": ["baseball games", "airports", "circuses", "football games"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009352.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17837, "question_id": "4dBdEso8EhYLwCNF3xaV8X", "question": "What part of the animal is hidden by the hat?", "choices": ["antler", "trunk", "horn", "ear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61186, "question_id": "4dnRrchU8KvPtDhENrNEFw", "question": "What car manufacturer is advertised on his chest?", "choices": ["mazda", "bmw", "honda", "mercedes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061186.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417951, "question_id": "4fXBAWAPNCVBQWtenJ7qyH", "question": "The item the man has in his hand is similar to what other item?", "choices": ["discus", "pogo stick", "skates", "trampoline"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417951.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316921, "question_id": "4fYHK8EyVy427Eu4unpSXm", "question": "What is different about this zebra from most other zebras?", "choices": ["unstriped legs", "small", "colors", "tall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316921.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363021, "question_id": "4fZTpqqfK7GjoTvRCdrhzy", "question": "What allows the person shown here to avoid falling?", "choices": ["animals", "glue", "straps", "balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363021.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 313122, "question_id": "4gCaiQuWjpY7HSm3cjCkqJ", "question": "What level of expertise does this person have using the items being held?", "choices": ["expert", "intermediate", "professional", "beginner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000313122.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 14409, "question_id": "4gXqMRpVEFpi5bB2QrJ6Np", "question": "What activity is this person engaging in?", "choices": ["ski race", "ski lesson", "recreational skiing", "summer olympics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000014409.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522141, "question_id": "4hGT9nUE4YTrRKEtWQMuVC", "question": "Who is the dog jumping on?", "choices": ["stranger", "dogcatcher", "veterinarian", "owner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449583, "question_id": "4hk67BJ2hAdYQeoekKQfqu", "question": "What is the purpose of the object the woman is lying on?", "choices": ["eating", "washing", "sleeping", "storage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507433, "question_id": "4jJ7wmDFd4EPMgLen4yBUn", "question": "What is this type of breakfast dish called?", "choices": ["wrap", "bowl", "waffle", "omelette"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269025, "question_id": "4jLBWfsQm6ohUWL4xcunfQ", "question": "What is the temperature of the air outside the train?", "choices": ["mild", "warm", "freezing", "chilly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269025.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106581, "question_id": "4jNkXHU52aiC7dTqAbursf", "question": "Where is this man skating?", "choices": ["parking lot", "skatepark", "back yard", "field"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476924, "question_id": "4jeXtbTr5r24i7qxVVxQe9", "question": "What material is used to make the track the train is rolling on?", "choices": ["bronze", "magnesium", "copper", "steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476924.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154396, "question_id": "4kEJZhsbQrmQ7kRmDqV7ya", "question": "What food would this animal probably prefer?", "choices": ["lettuce", "eggs", "beetroot", "tuna"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296622, "question_id": "4kJpuLaeMgqBpaVD2QqtmJ", "question": "What type of tool is this?", "choices": ["power", "utensil", "hand", "gardening"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296622.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202930, "question_id": "4nPvCZ4yDMkknZmiGtgNet", "question": "What most likely caused the hole in the frisbee?", "choices": ["fire", "knives", "rocks", "dog teeth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202930.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433687, "question_id": "4nqDru7JKEGtHFZ4EeEd9N", "question": "The person who owns what will make the quickest time on their trip today overall?", "choices": ["motorcycle", "nothing", "bike", "oar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444938, "question_id": "4oPUWndNsjpbP9E3i4TKtC", "question": "Where is the bus currently parked at?", "choices": ["train station", "bus stop", "parking lot", "warehouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383693, "question_id": "4oi7D6cksjoD5HigShPoCp", "question": "What is the person in control of this vehicle known as?", "choices": ["rider", "driver", "pilot", "chauffeur"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000383693.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186479, "question_id": "4orJvr7rMYZiCt2JAwqBpU", "question": "What has this person just done?", "choices": ["danced", "thrown frisbee", "fought", "stretched"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466778, "question_id": "4owPE7T7uwAqDePBiLWghf", "question": "What is unusual about this ski surface?", "choices": ["high", "sand", "loose", "crowded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22800, "question_id": "4pBAD2bAth62sQ9YhaCJX8", "question": "What type of animal is this?", "choices": ["domestic", "aquatic", "reptile", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56779, "question_id": "4qVMTNsmHYpDSeRmW4buDD", "question": "Which of these is not a likely time of day here?", "choices": ["sunset", "noon", "morning", "evening"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056779.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413437, "question_id": "4qyp8Np6CdxPUwcmQPvG2C", "question": "What activity is taking place using the reflected light?", "choices": ["cooking", "photography", "eating", "painting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413437.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108591, "question_id": "4rYtQmnpMtjBS5otsPXAGc", "question": "What motion is this animal known for?", "choices": ["slither", "fly", "swing", "trot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108591.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458063, "question_id": "4sThnaF4HUUM4cncAETypY", "question": "Where is sitting right under the remote?", "choices": ["bugs", "candy", "sheet", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466300, "question_id": "4trjToDcVadyUtZwyX7xky", "question": "The floor could best be described as being what?", "choices": ["ancient", "messy", "clean", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136080, "question_id": "4uyULkhyVa8Pj9W4qYEBPG", "question": "What is seen flying through the air?", "choices": ["kite", "eagle", "butterfly", "airplane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459829, "question_id": "4vd3Jobbivd2DxenVpwfom", "question": "The circular object in the road marks the entrance to what?", "choices": ["hideaway", "sewer", "mine", "dam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193532, "question_id": "4wwS6rCSMpyVYhu3sHVtZr", "question": "What type of area is shown?", "choices": ["urban", "desert", "forest", "rural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193532.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155354, "question_id": "4xQp7LN9Uum7kHtaF3Lay2", "question": "What type of sound does these creatures make?", "choices": ["screech", "moo", "bark", "quack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106581, "question_id": "4xbPEeGguTBEZ9h2aeAXbC", "question": "The sun is closest to what body part of this person?", "choices": ["left foot", "right hand", "left knee", "rear end"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524224, "question_id": "4xfPBZwXAnjqvGH3oAoyZS", "question": "What is the animal on the left sticking out?", "choices": ["horn", "belly", "tongue", "antlers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524224.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4505, "question_id": "4yFKt6VLa4XV2cYBTPWRzi", "question": "What word can be spelled by removing one letter from the first word that appears on the bus?", "choices": ["hamburger", "bacon", "eggs", "potato"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265039, "question_id": "4yKqPyGAnmpfnSh8JH5q3o", "question": "Which item is most flexible?", "choices": ["yellow", "none", "blue", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501586, "question_id": "4yvq293THnzWThMow3QGFd", "question": "A group of these animals is called what?", "choices": ["herd", "school", "sleuth", "clowder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562216, "question_id": "52KKuoMeu5wgLjZX3W8yd9", "question": "How are trains here powered?", "choices": ["electric", "steam", "coal", "gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562216.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76530, "question_id": "52dsL554rTipUjj9XknA6e", "question": "What is the hand feeding the elephant?", "choices": ["greens", "grain", "hay", "pizza"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164336, "question_id": "52euQpNV69L6np6TGXHKP7", "question": "Is the surfer surfing between two?", "choices": ["mountains", "dolphins", "swimmers or", "yachts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282637, "question_id": "53xM6HnQbaofxXVT6a5gbB", "question": "How many different speeds does the bicycle have?", "choices": ["three", "one", "nine", "twenty one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282637.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 88737, "question_id": "545S5yXE2FjpNCaJSYY8FR", "question": "What is this vehicle running on?", "choices": ["water", "rails", "air currents", "roads"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000088737.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347706, "question_id": "546JUKJ6fGssPniEzWVHiz", "question": "How does the man know the boy?", "choices": ["parent", "spouse", "neighbor", "teacher"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347706.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541419, "question_id": "55GzHQAT6p7LZAkJKhRNXS", "question": "This cake is likely for a person that works where?", "choices": ["animal shelter", "hospital", "fire station", "police station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 174269, "question_id": "56XbdU27LzMB9hho8igWpc", "question": "Why is he silting to the side?", "choices": ["confused", "falling", "going home", "maintain balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000174269.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 13207, "question_id": "56iunnqaEUkt3nSGJbuukT", "question": "What is the green stuff near the train?", "choices": ["grapes", "watermelons", "limes", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000013207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400419, "question_id": "576XtqLE3z3D4zHdgXc8xT", "question": "What body of water is he in?", "choices": ["lake", "pond", "ocean", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544531, "question_id": "57GqYZ8S4JeQJECVCFL6P5", "question": "What is the bear doing here?", "choices": ["hiding", "resting", "eating", "posing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544531.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555868, "question_id": "57hD64HhUKQcuVvKWBtwcz", "question": "What basic home necessity is the store with the man in jeans standing selling?", "choices": ["cleaning supplies", "potable water", "cooking oil", "fuel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425708, "question_id": "5A9tirmvFNnWoh4d3xdcTU", "question": "What will happen soon unless another coin is inserted?", "choices": ["ticket", "accident", "arrest", "tow"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000425708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321175, "question_id": "5AaP9eCJncw6mVopzMNX55", "question": "What might this animal most prefer to do here?", "choices": ["collect shells", "fish", "fight", "bathe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75796, "question_id": "5AeX7KF8sFgYttxxjBRW8s", "question": "What type of pizza is shown?", "choices": ["pan", "sicilian", "neapolitan", "personal pie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075796.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 78366, "question_id": "5BxHcQBFT3wFFb54A6v7TL", "question": "What is needed for this activity?", "choices": ["sun", "rain", "snow", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000078366.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339015, "question_id": "5CTaLRbdnNwZMTo9iW6ZHw", "question": "What is this species of bird known for?", "choices": ["running", "talking", "swimming", "dancing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339015.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431017, "question_id": "5DvArx2ku8UMMi6uHJKDYy", "question": "What style is the bed covering?", "choices": ["comforter", "quilt", "sheet", "blanket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431017.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65230, "question_id": "5EnrXUxFUzr237uuJk2rpP", "question": "What is near the brown table?", "choices": ["goggles", "sneakers", "boots", "women's shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238413, "question_id": "5Hb83Lnubt9NvypbqSRZBc", "question": "Why is the child wearing goggles?", "choices": ["eye protection", "spying", "night vision", "blindness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000238413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344777, "question_id": "5HxGMpTxtmKZk9fcYdDBxW", "question": "What service department uses this yellow and silver item?", "choices": ["teachers", "police", "fire", "swat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345296, "question_id": "5J4ozSJ5HjitKNSMs2EhsF", "question": "What is putting the bird in danger?", "choices": ["trunk", "foot", "head", "stomach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157583, "question_id": "5JC2ChjZ9ttXgENwFE9BeS", "question": "What body part does this child view in the mirror?", "choices": ["leg", "hair", "arm", "teeth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124108, "question_id": "5JFuD6QAGCEXAYNRru9yiQ", "question": "What is the bird doing?", "choices": ["nesting", "foraging", "mating", "resting"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000124108.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19583, "question_id": "5Jkw7USe8Go9aZLydahmYR", "question": "What is the purpose of this mirror?", "choices": ["makeup", "car backup", "dentistry", "security"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259916, "question_id": "5KaCFVKmkdPvRyMtVS9f3A", "question": "Why is the stop sign on top of the side of the bus?", "choices": ["not visible", "not needed", "broken", "wrong bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259916.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179066, "question_id": "5LBRdBf6JfNVHeU6U9CeyD", "question": "What is needed for this activity?", "choices": ["ice", "snow", "sun", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464055, "question_id": "5LBSWSh2Cpcb8V6oTQztQ2", "question": "What is most likely nearby?", "choices": ["marina", "beach", "aquarium", "amusement park"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000464055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461260, "question_id": "5M7cm2qrxpAk8Rzya9sUKM", "question": "What is the purpose of the numbers on the front of the train?", "choices": ["origin factory", "identification", "coordinates", "math problem"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461260.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3887, "question_id": "5Miit3PAFW8DysELGurKjo", "question": "What would cause the requirement mentioned?", "choices": ["accident", "pothole", "flooding", "red light"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000003887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418631, "question_id": "5PB4HRuMmqUEZi2SidnGjR", "question": "If one of these creatures was attacked what would their best defense be?", "choices": ["running", "ears", "tusks", "trumpeting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447263, "question_id": "5Srn97JQ9n7gVAUMg96cFj", "question": "In what area of the house is the cat hanging out in all likelihood?", "choices": ["bedroom", "kitchen", "bathroom", "den"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449495, "question_id": "5SuqUjEpkZKz59CtMD6JQb", "question": "What language is beside the dolphins on the front of the bus in red?", "choices": ["greek", "cantonese", "japanese", "spanish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449495.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6063, "question_id": "5UcK9yL4uAf6t925MkBbeJ", "question": "When is it safe to cross this intersection?", "choices": ["tomorrow", "1 hour", "never", "now"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39829, "question_id": "5UwJuiDx2stgAycDvdoQ2r", "question": "What time of year is it?", "choices": ["spring", "summer", "autumn", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283413, "question_id": "5VZgfzmUeTdTYazrUAPgwB", "question": "What is this person ready to do?", "choices": ["bathe", "eat", "sleep", "work"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404627, "question_id": "5VzmJNKmmBPuUUwgmS8MhK", "question": "What does this place most likely sell?", "choices": ["coffee", "video games", "shrubbery", "art"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545098, "question_id": "5WBCTkMp2phpHxu5TtNKcK", "question": "What device might this cat accidentally turn off with it's body here?", "choices": ["garage door", "oven", "tv", "stove"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545098.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460606, "question_id": "5WMxVsMqpXr4TxRhnFEV6z", "question": "How many trucks does Coke have?", "choices": ["2015", "5487", "1235", "1354"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460606.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273009, "question_id": "5WZ22DV3x4KqhRKn4a75JR", "question": "What can the drawings be described as?", "choices": ["impressionist", "graffiti", "surrealism", "cubist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273009.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314276, "question_id": "5Wq2VDUt7Sjip3oTvWXFJc", "question": "Why is the train stopped here?", "choices": ["train station", "no fuel", "for cleaning", "is lost"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000314276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323643, "question_id": "5WwnEQikah4DKEkho2RkCd", "question": "What is the water closest to?", "choices": ["man's leg", "cat's tail", "umbrella", "man's head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000323643.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310488, "question_id": "5YZJr5M2M9CQn6gypKH25A", "question": "What style of bread was the sandwich built on?", "choices": ["multigrain", "pumpernickel", "white", "rye"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000310488.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301833, "question_id": "5aCWpnEt8djrjJJtEXPcbV", "question": "What board game is shown on the tie?", "choices": ["go", "chess", "clue", "checkers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90242, "question_id": "5atsfBBN3YHn6b8fbmApCe", "question": "What kind of elephants are these?", "choices": ["middle eastern", "tasmanian", "indian", "african"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 13245, "question_id": "5b8Ku7XjeVDRGDQyjSLu8d", "question": "What is located under the triangles on the wall?", "choices": ["dog", "box", "cat", "couch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000013245.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82156, "question_id": "5bNhEQyJy5Uy2rwiJKYk8r", "question": "Which one of these beverages could be useful for him?", "choices": ["chamomile tea", "water", "milk", "energy drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234868, "question_id": "5cuE82PrQZheXDnzTPuPUP", "question": "What unique feature does this animal have?", "choices": ["trunk", "hoof", "beak", "antenna"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176103, "question_id": "5dWKxdkweD4SYcUjiybsrV", "question": "How do you know the cow is behind the car?", "choices": ["wall", "mirror", "tree", "window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176103.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404993, "question_id": "5e2HKYNGMX28QXfNwWcaA5", "question": "What might be causing the cows to run towards the camera man?", "choices": ["danger", "love", "confusion", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326675, "question_id": "5e7ZPcf8wcW9TimSxa7RZj", "question": "What is the purple item called?", "choices": ["zucchini", "onion", "plum", "people eater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326675.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402925, "question_id": "5eU6aBmojtQemEPZQaYPjJ", "question": "People in this crowd are wearing hoods because of which weather event?", "choices": ["rain", "snow", "sleet", "fog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503377, "question_id": "5f4v97QVRuE9p42LBxWya7", "question": "Where is this toilet placed?", "choices": ["dollhouse", "car trunk", "store display", "garden"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350573, "question_id": "5gd7MUGn4ZTE9PWRXodXPP", "question": "Where is this dog located?", "choices": ["barn", "home", "office", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19393, "question_id": "5gh6bNTShVcwepD8Whxoou", "question": "What part of this wave is this surfer in?", "choices": ["tube", "pocket", "lip", "shoulder"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000019393.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385630, "question_id": "5gvMB5qtzYPmCnvJeb8Yok", "question": "What type of environment is this sign most likely in?", "choices": ["park", "rural", "suburban", "urban"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385630.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73475, "question_id": "5hR8dAbL9uggcgtesnYLWJ", "question": "What is this baby animal called?", "choices": ["joey", "puppy", "cub", "calf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503708, "question_id": "5hYHZycrUwRdTtyKcgqT2N", "question": "What are the elephants satisfying?", "choices": ["hunger", "thirst", "cold", "fatigue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164055, "question_id": "5jH7oJhPo4vZBmN2zSHFqm", "question": "What feature does this animal have?", "choices": ["antlers", "wings", "tusks", "talons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 148080, "question_id": "5jMzMpD4XfBoNwUnMggwxd", "question": "Why is the polar bear mostly brown?", "choices": ["mutation", "tattoo", "radiation exposure", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000148080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 538365, "question_id": "5kJGv6BhhjWv3WV4ttf6Kq", "question": "What is under the mustard on the hot dog?", "choices": ["ketchup", "sauerkraut", "hot peppers", "relish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000538365.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477971, "question_id": "5nkkDVW8gJhqUXEhk9BWaj", "question": "What is this bear trying to do?", "choices": ["drink", "rest", "swim", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234459, "question_id": "5oe2A8ZniBuCytgCg3QTbj", "question": "The orange item will help it do what?", "choices": ["fish", "control boat", "eat", "stay afloat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483542, "question_id": "5onDKPgCUaMCDMG6htrJ46", "question": "What is required for this activity?", "choices": ["snow", "water", "sun", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212468, "question_id": "5pTMJfCoqQ6DGhd3KeVAbh", "question": "Which one of these books revolved around this type of animal?", "choices": ["stuart little", "watership down", "charlotte's web", "black beauty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212468.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95570, "question_id": "5pdZtKBWgkxVsVn3U6UeN3", "question": "What famous person was afflicted with the ailment the sign says?", "choices": ["helen keller", "stevie wonder", "stephen hawking", "marie curie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143960, "question_id": "5pmL5JptVmBjRgp79BApmJ", "question": "This man most likely belongs to what ethnicity?", "choices": ["japanese", "lithuanian", "german", "korean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000143960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532328, "question_id": "5q2LH8dKDFqARvd4dnKFzb", "question": "What is the mood illustrated on the TV here?", "choices": ["sadness", "glee", "ennui", "joy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532328.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374821, "question_id": "5q2WvcxSwksCNixzD7s8Va", "question": "What celestial bodies are displayed on this train?", "choices": ["sun moon", "mercury", "mars venus", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374821.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522578, "question_id": "5qcJpwFtSE6gU4rieEwyPE", "question": "What type of facility is the walkway leading to?", "choices": ["space research", "space museum", "medical research", "medical hospital"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431411, "question_id": "5rVbtALLmByasexUpK5Kgc", "question": "Which station comes before the terminal station?", "choices": ["hanover street", "arriva", "liverpool one", "79"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431411.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478151, "question_id": "5rvJeND79fpNa7wg7qNU59", "question": "What sex is the owner of the items shown here most likely?", "choices": ["third", "none", "male", "female"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200347, "question_id": "5sQEPjFdzXF28xGX597q5j", "question": "What is the large vertical object on the right of the image?", "choices": ["exercise equipment", "scale", "truck", "hanger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29971, "question_id": "5snnjb7fZuVeohQ3dm8Q3z", "question": "What paved area is directly behind the signs?", "choices": ["driveway", "parking lot", "sidewalk", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245541, "question_id": "5so3LSvCrjMF7FMoMdMSvs", "question": "Which one of these might be a category in this competition?", "choices": ["vault", "hurdles", "carpentry", "pastry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147304, "question_id": "5spRGnATKUUrGHv3V8cz8k", "question": "What kind of area in the city is the man flying his kite in?", "choices": ["residential", "commercial", "industrial", "undeveloped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522684, "question_id": "5tCGShTDTKWDgCgSFuVf9j", "question": "What number comes sequentially after the largest number on the round item?", "choices": ["13", "22", "four", "24"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386843, "question_id": "5tsirGZ5T2VoT4NHadmUkY", "question": "This scene is similar to what happening in sports?", "choices": ["strikeout", "snap", "home run", "jump ball"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000386843.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382776, "question_id": "5uDP2VsrRxHyAe368Yngk6", "question": "People often used these types of cases while traveling by what mode?", "choices": ["plane", "motorcycle", "ship", "bicycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382776.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226098, "question_id": "5uRsZL49QvfR7r3ZyTF2Pi", "question": "What country does the above the ANA planes have its origins?", "choices": ["canada", "japan", "england", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226098.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379835, "question_id": "5ud3d83Gcw9r3nB6sMR7n3", "question": "How many years ago is the date on the hydrant?", "choices": ["ten", "twenty five", "one", "thirteen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374050, "question_id": "5vJSNxyA48nen5y8pfNwE3", "question": "What is required for this activity?", "choices": ["waves", "snow", "wind", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374050.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125294, "question_id": "5veZNqEWkGx3E3kbQs9G6v", "question": "What is the prolonged sleep of these animals called?", "choices": ["precipitation", "metamorphosis", "germination", "hibernation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125294.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422372, "question_id": "5wauAmdK754yAxCJUUxWgD", "question": "Which City uses this bird as a sports mascot?", "choices": ["saint louis", "washington dc", "new york", "kansas city"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422372.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250661, "question_id": "5xSepseuhsv3tQ6FSFsuqe", "question": "What is the silver pipe located to the right of the bed used for?", "choices": ["plumbing", "cooling", "heating", "room decor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250661.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395196, "question_id": "5z4KEnnxe4qWqdvCyLFupH", "question": "Which one of these Japanese companies is partial owner of this rail service?", "choices": ["mitsui", "toyota", "konami", "honda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395196.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355138, "question_id": "5zViyNiXyetvrVzJFe7eh3", "question": "What shape is the sign?", "choices": ["square", "hexagon", "circle", "rectangle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413670, "question_id": "62uiUGSxf7yhrVFL9zAFFL", "question": "What type of back end does the red car have?", "choices": ["liftback", "notchback", "fastback", "hatchback"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3257, "question_id": "63K8EVVW7B3cdWc5LtPUWG", "question": "Which part of this animal is likely to cause you to mess up work you are doing on line?", "choices": ["nose", "ears", "back", "paws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193532, "question_id": "64E3VtqvJ4VVRcBmXjdBTf", "question": "What kind of entertainment is behind the elephant?", "choices": ["zoo", "racing", "sports", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193532.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20798, "question_id": "64eGXF3FNmgGR5WkHSoY8j", "question": "What will he use the white thing around his wrist for?", "choices": ["holding money", "wiping sweat", "signaling umpire", "checking time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391649, "question_id": "65rqQFh5x3bPKHUj6g5u9W", "question": "What is the stick used for?", "choices": ["clogs", "holding curtain", "getting up", "decoration"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437046, "question_id": "66DpG28HjCn9mAQQ7nADGs", "question": "The colors on the front and bottom of the hydrant are found in the logo of what company?", "choices": ["taco bell", "mcdonald's", "chipotle", "popeye's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000437046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99702, "question_id": "66u4GxF2gJmwjG24PwX9Wb", "question": "This animal can harm people using what?", "choices": ["stinger", "claws", "beak", "horns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099702.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357876, "question_id": "677ypf3RaNiSB6jNiwBhHt", "question": "How does this television receive a signal?", "choices": ["internet", "cablevision", "wifi", "over air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525620, "question_id": "68N27byLRQfH8tN7Krh6Az", "question": "What does the sign help someone find?", "choices": ["exit", "their car", "their dog", "their seat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525620.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 62268, "question_id": "69CJGpmYMhdTFmfaG3q898", "question": "What is being done to the dog?", "choices": ["delivering", "inspecting", "washing", "spanking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000062268.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400012, "question_id": "69CZMXCNGpYMKsJHfGSGat", "question": "The number on the bike is how far away from the number fifty?", "choices": ["nine", "two", "ten", "seven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400012.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453585, "question_id": "6AkGQCdzzB8Y4prwfwinXU", "question": "What information can one get from the building?", "choices": ["news", "location", "weather", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444567, "question_id": "6B7hdx5r9dEy9vCCejPYph", "question": "What type of recipe could this vegetable be used for?", "choices": ["cake", "soup", "sandwich", "pie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444567.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100007, "question_id": "6BDFFcsC9JnMKvu8qPgSZq", "question": "Where is this animal located?", "choices": ["wild", "beach", "zoo", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100007.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163194, "question_id": "6BPrhtTUsEAB2f842gs5VU", "question": "During what time of the day was this bear photographed?", "choices": ["morning", "afternoon", "night", "noon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294345, "question_id": "6FNoDhS3aguMa5KfrfTSjR", "question": "What material is the wall made of?", "choices": ["glass", "wood", "tiles", "marble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294345.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395910, "question_id": "6FXhPCHMAKewSeknsqDtqz", "question": "This sports item is used to play which game?", "choices": ["cricket", "base ball", "tennis", "hockey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395910.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444707, "question_id": "6FdV7yUCr7QEoiBRYPj24Q", "question": "What allows this animal to keep their feathers dry?", "choices": ["slicker", "trees", "long legs", "hippos"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487684, "question_id": "6Fk3e8wYjHfedZAE7eUL9o", "question": "Consuming the items on this surface would be considered what activity?", "choices": ["eating", "drinking", "competing", "vaccinating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276844, "question_id": "6FwrfdDoshQzrLdRm9vmXp", "question": "This type of image is known as what?", "choices": ["painting", "comic", "composite", "panorama"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000276844.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153387, "question_id": "6GmjUgCM85uvUfjoStMAt9", "question": "What does this animal have?", "choices": ["long neck", "hooves", "trunk", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349983, "question_id": "6HGBZXqs2rKzdk4jjCobiQ", "question": "What kind of range is nearby?", "choices": ["mountain", "gun", "cooking", "shooting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349983.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136545, "question_id": "6HZS3Y3QhqhLvdxS8cexGR", "question": "What item makes up the red part of the food on the ground?", "choices": ["tomatoes", "pickles", "blood", "garlic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136545.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316523, "question_id": "6JRxcwGjFqPUVzYcui5Tui", "question": "What in this picture should not be there?", "choices": ["grass", "water", "rocks", "red chair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467531, "question_id": "6Jv9YjzKemYXUyLWoLa8J6", "question": "What is causing the waves that are being surfed?", "choices": ["ocean", "river", "wave pool", "flood"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000467531.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108469, "question_id": "6KGQ8oC5t4r5JEGgQk8N6E", "question": "What might happen to this bear if there is loud bass music playing?", "choices": ["crying", "singing", "eating", "movement"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297433, "question_id": "6KocEQnCzbyiCa7TRpPFgk", "question": "Which Italian food is depicted by the image?", "choices": ["ravioli", "stromboli", "spaghetti", "pizza"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488527, "question_id": "6LbxWExV6yUUyDPnv3tkRT", "question": "The cat is located in an apartment that is which type of climate?", "choices": ["tropical", "mediterranean", "temperate", "arid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488527.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347706, "question_id": "6MNridFy67zSJJG9hAS6WM", "question": "How does the person in the white shirt know the person in the green shirt?", "choices": ["coworker", "boss", "parent", "teammates"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347706.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45250, "question_id": "6N6VNaXU9i4kRK5X2Kt6y7", "question": "What would usually be placed in the upright device on the left side of the scene?", "choices": ["coins", "doll heads", "dollars", "credit cards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045250.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356084, "question_id": "6NGjrQUZ54VwHV73AafgnQ", "question": "Thermoformed acrylic and porcelain-enameled steel is used to make what?", "choices": ["wash basin", "toilet", "bath tub", "shower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302466, "question_id": "6NkRPDsjRNBHzv6qzBm9sA", "question": "Where are the animals situated at?", "choices": ["zoo", "animal farm", "barn", "wilderness"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000302466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74108, "question_id": "6Pivpy53ZE3GzMjYdnS2GB", "question": "What country is this town in?", "choices": ["germany", "russia", "france", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074108.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472839, "question_id": "6QtXR7oTjv6sUNVGsNmP2z", "question": "What are the candles in the ice cream tub used to celebrate?", "choices": ["birthday", "christmas", "new year's", "thanksgiving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472839.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58243, "question_id": "6R26R2qZ2qDW9DKUzrzgeu", "question": "How is this flower illuminated?", "choices": ["camera flash", "electric light", "moonlight", "daylight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000058243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97631, "question_id": "6RVzjWrV9gkq4uq3WVoqZM", "question": "How many giraffes do you see?", "choices": ["four", "one", "two", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386319, "question_id": "6RWvLgGLqwkdra9qyXL3VT", "question": "What kind of fabric is the women who is jumping wearing?", "choices": ["plush", "silk", "corduroy", "denim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259978, "question_id": "6SHkyErvimqj9h7WYtgcxj", "question": "What color pants should he put on to match completely?", "choices": ["red", "yellow", "black", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510348, "question_id": "6SPUPs9fBSwcn4NxrFZPXi", "question": "What is on the wall behind the bear?", "choices": ["lock", "toy", "basket", "window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 530623, "question_id": "6SYwnfbYkEx6vxaUSoAKaZ", "question": "How were the steps treated?", "choices": ["paint", "chalk", "stain", "varnish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000530623.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195498, "question_id": "6Svz9dAg9sYhCC97xytpqH", "question": "What sport are the men playing?", "choices": ["disc golf", "european handball", "cricket", "rugby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275447, "question_id": "6Tcaf3fPknVkEu4CTxHVBQ", "question": "What is utilized by the person with a helmet to slow their progress?", "choices": ["nothing", "rein", "whip", "saddle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53276, "question_id": "6TosFsTcjmh2KcqtMB2dUK", "question": "What is the green box being used for?", "choices": ["stand", "storage", "dinner plate", "dog bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21371, "question_id": "6TpQDrZkE3JCCjFXzUVNTi", "question": "What sport is the boy taking part in?", "choices": ["skating", "skate boarding", "snowboarding", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334528, "question_id": "6VGYBbai3zM8fDH3A8jaeh", "question": "Where might you look in this room if you have a headache and need aspirin?", "choices": ["under tub", "under sink", "in tub", "behind mirror"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334528.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172444, "question_id": "6WrsAEow2tpawVSogmrZPL", "question": "Why is there an object in the toilet?", "choices": ["spying reasons", "criminal reasons", "cleanliness", "monitoring"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000172444.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118782, "question_id": "6WzhRTRuEPxuhf5EJ23Ru4", "question": "Which one of the cities named Portland is shown here?", "choices": ["oregon", "georgia", "maine", "connecticut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118782.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128016, "question_id": "6X4mGyBV9cadCZVqYjRrgd", "question": "Which of these food groups is unrepresented on the plate?", "choices": ["fruit", "protein", "grain", "vegetable"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000128016.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454012, "question_id": "6XJMs9UipW7ev4bq7Di6y4", "question": "Where is this bear located?", "choices": ["museum", "circus", "zoo", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454012.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31862, "question_id": "6XNVPuwZhYF5BVv2de4473", "question": "What is out of place in this photo?", "choices": ["floor", "beer can", "liquid", "toilet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031862.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 274928, "question_id": "6XuzKiaC2cfcyw4VZBwiVp", "question": "What are on both sides of the large black item?", "choices": ["cats", "mirrors", "pikes", "shovels"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000274928.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308640, "question_id": "6YeRJw7b75zqQax2H8YAbe", "question": "The message is most likely from who?", "choices": ["cousin", "government agent", "brother", "scammer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74971, "question_id": "6YnoTK8geLpVPxTeTvZoSm", "question": "What are they doing?", "choices": ["praying", "playing stickball", "fighting", "cleaning beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309837, "question_id": "6bvvSwzRZMGBe2wZos56WN", "question": "What is the person shown here doing?", "choices": ["escape", "right click", "left click", "back up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306907, "question_id": "6bxyKsmThnD3h2LidtyvCF", "question": "What is the man wearing on his head?", "choices": ["helmet", "baseball hat", "chef hat", "fedora"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409677, "question_id": "6djypkiLxxK4JrDiFLzqL7", "question": "What name describes a different type of these vehicles?", "choices": ["sedan", "catamaran", "pickup", "tank"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409677.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184928, "question_id": "6eM62J39BtSYXJF686RTTk", "question": "What animal is shown on the computer screen?", "choices": ["gray squirrel", "tree weasel", "fruit bat", "flying squirrel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184928.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252697, "question_id": "6ehJxXSuixgTroEJqgYu6x", "question": "What name is given to this type of plane?", "choices": ["jet", "boeing", "helicopter", "crop duster"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000252697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295452, "question_id": "6epyFo4EwDUohrRzF2rZ2P", "question": "What is the profession of this woman?", "choices": ["dentist", "athlete", "janitor", "doctor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85736, "question_id": "6fNyjqn7ghFxmVnCY923Ws", "question": "What fruit grows on this stalk?", "choices": ["cherries", "bananas", "berries", "coconuts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085736.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130067, "question_id": "6fXMVAt5ZZFYbzPMaCsbv4", "question": "What is the most conventional place to practice this activity?", "choices": ["river", "pool", "lake", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11389, "question_id": "6fZTNzKjZ4pUzUQdS79kKE", "question": "What is the landscape behind the train cars?", "choices": ["forest", "desert", "swamp", "tundra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61458, "question_id": "6fmok8pmQDSTmjhH3smY7L", "question": "What animal is usually found in this kind of environment?", "choices": ["camel", "dolphin", "polecat", "ant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061458.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283235, "question_id": "6fr3MUBwEMjaux5PUaFZBT", "question": "The brand of these phones is written where?", "choices": ["right side", "left side", "top", "bottom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355148, "question_id": "6hYZzHM4UJ2PRyGZEGLv37", "question": "What purpose do the glasses of the man on the right serve?", "choices": ["vision", "protection", "style", "endurance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355148.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 479224, "question_id": "6iDErfzmGYSfbGVaV5BnkX", "question": "What is the girl about to try to do with the tennis ball?", "choices": ["serve it", "miss it", "return it", "smash it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000479224.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532870, "question_id": "6iKikAgNSwby7haGocuW5a", "question": "The first two number of identification tag denotes what?", "choices": ["birth month", "own number", "birth year", "birth date"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000532870.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126220, "question_id": "6iTHxVAcrZF4LJUVQAdczj", "question": "What is a glaring issue with this bathroom?", "choices": ["sink faucet", "very narrow", "very wide", "no bathtub"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000126220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162187, "question_id": "6iWRgWb8VJG7uqt7D84m5F", "question": "What hour of the day is more closely signalled behind the train?", "choices": ["900", "1100", "1000", "800"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336184, "question_id": "6iydASGQkmUjPDySpYhQ6G", "question": "What is used to attach the design to the wall?", "choices": ["ropes", "tape", "honey", "paste"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436364, "question_id": "6izhm3ECPTh8R2RmXh7ksN", "question": "Why is the person wearing the hoodie?", "choices": ["parachute", "fashion", "cold protection", "aerodynamics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36369, "question_id": "6jTNurCnZjkDxhgN7v9VQR", "question": "These animals are most likely where?", "choices": ["highway", "jungle", "zoo", "rainforest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036369.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5118, "question_id": "6jpFaHiQEsmntVkEbBonph", "question": "Why is the back of the bus blurred?", "choices": ["broken camera", "bad cameraman", "bad film", "moving fast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271811, "question_id": "6jvpVs9SLZCNnjCVpK9LE3", "question": "What season is occurring?", "choices": ["winter", "fall", "spring", "summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189823, "question_id": "6kibztZPFNXPmFCVMhLCkz", "question": "What does this business repair?", "choices": ["watch", "television", "auto", "microwave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475246, "question_id": "6kqc3XaDFbTAgFDkNdmp5C", "question": "Over how many meters tall is this animal?", "choices": ["15", "ten", "2.5", "five"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000475246.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343340, "question_id": "6mfJn53iKrmdXrDqxHWscC", "question": "The yellow sign discourages what?", "choices": ["fruit picking", "speeding", "pan handling", "flying"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483194, "question_id": "6o9LLkgNEGHZ6AgJvpQUs2", "question": "What is the type of meal that the chef is most likely creating?", "choices": ["soup", "casserole", "pan fry", "sandwich"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 392420, "question_id": "6of5opQNZ52aSZDCtuV2g2", "question": "What visible item is floppy?", "choices": ["carrot", "disk", "bunny ear", "dog ear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000392420.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283971, "question_id": "6ohfHQtERVQmw9omw58gbu", "question": "What makes going through the small wire fence here so unattractive?", "choices": ["electrocution", "barbs", "smell", "noise"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539281, "question_id": "6orGnF8e54WigKvAVBFYNy", "question": "What breakfast food is shown?", "choices": ["pancakes", "yogurt", "bacon", "waffles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539281.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542293, "question_id": "6pLSZWuq9FuJfjsYaZbWcA", "question": "What clothing item is this person likely checking out in a mirror?", "choices": ["glasses", "necktie", "collar", "belt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542293.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500470, "question_id": "6pbDYRWuZrrVBSyokxiNf6", "question": "Why are the people above wearing helmets?", "choices": ["decoration", "swag", "protection", "fun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41579, "question_id": "6qfmRCcdeiuMdrbLHzz2Ra", "question": "What is the purpose of the prongs on the device?", "choices": ["scratch scalp", "prevent electrocution", "stop static", "better grip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189276, "question_id": "6rBZQSiBFc64Qcd3okbJhR", "question": "What are the birds doing on the object?", "choices": ["hunting", "eating", "sleeping", "perching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158121, "question_id": "6saYSo9vossNCYaYyD9yfg", "question": "How is the slice of pizza being illuminated?", "choices": ["camera flash", "daylight", "electric light", "moonlight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158121.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500551, "question_id": "6t278kmqdNFAzXDpE2Q88a", "question": "Which one of these professionals probably put that bandage around her leg?", "choices": ["oncologist", "meteorologist", "pediatrician", "sports doctor"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000500551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 278466, "question_id": "6tDdRg9f5SbymYJLEV3KbA", "question": "The people in the ocean are performing which sport?", "choices": ["boogie boarding", "water polo", "surfing", "skim boarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000278466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384279, "question_id": "6urLrcjhqnTndVwppNDoSs", "question": "What action is being taken?", "choices": ["paint", "brush", "cut", "serve"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36302, "question_id": "6ustUnpndrqiHesiLvagzZ", "question": "Where is the photographer located?", "choices": ["in car", "on car", "before car", "behind car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192315, "question_id": "6v2wPpaYR6qyRh6kUfrTwU", "question": "What kind of animal is shown?", "choices": ["reptile", "aquatic", "domestic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20602, "question_id": "6v88uyKuKmAXsdDGu5LBMv", "question": "What religion uses buildings like these as a place of worship?", "choices": ["judaism", "islam", "buddhism", "christianity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020602.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191410, "question_id": "6vKuiQa5xLFR2WQmo6p9Ud", "question": "Which country is the flag on the truck?", "choices": ["united kingdom", "cuba", "united states", "chile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191410.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550083, "question_id": "6vTyc4jrJQdPfw3WwwFsgD", "question": "What is the person in danger of doing?", "choices": ["sinking", "wiping out", "bottoming out", "capsizing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550083.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337913, "question_id": "6w7yQNahF29UWmNVQ5AwfM", "question": "The person is trying to maintain what?", "choices": ["checkbook", "marriage", "dignity", "balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337913.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 497282, "question_id": "6xKGPyFrqeep7ekCC8un5u", "question": "The large gray pole in the back here supports what?", "choices": ["ski lift", "ad signage", "light pole", "phone pole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000497282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498170, "question_id": "6zGaxPti8qJd5Pf6GwNQwd", "question": "What is an action that this animal is known for?", "choices": ["gliding", "grazing", "swimming", "hunting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498095, "question_id": "6zTcvh7by9p4rS45MNiBGm", "question": "This famous streets belongs to which city?", "choices": ["hobart", "darwin", "sydney", "melbourne"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476695, "question_id": "73Eed7mEGuadHBMnnyLoiv", "question": "What is the weight of the horn does this sheep has?", "choices": ["30kg", "20kg", "14kg", "10kg"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000476695.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474793, "question_id": "73pPuSGPt3KsjdfAHYrqQp", "question": "Why are the bears colored differently from their mother?", "choices": ["are dirty", "are younger", "camouflage", "different parents"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474793.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262695, "question_id": "75xpSsBGiU6QLVoVaK4gZ8", "question": "What does the man have on?", "choices": ["armor", "fedora", "scarf", "hard hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262695.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7113, "question_id": "763idSngTiP5hz6fbtWyBS", "question": "What is the wall painting supposed to depict?", "choices": ["grazing land", "ocean", "rainstorm", "mountains"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000007113.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365639, "question_id": "76FbqARNetMhYtespqsdAN", "question": "What state would you be going to if you took exit 304?", "choices": ["wyoming", "nevada", "colorado", "utah"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365639.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226138, "question_id": "77ccic3h2nQf74kiCxdffo", "question": "What is the largest number that is written on the boat atop the truck?", "choices": ["nine", "eight", "two", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178698, "question_id": "78jweZcqHQfYDZMvgKrLhP", "question": "What is affixed to the front door of the refrigerator and freezer in the kitchen?", "choices": ["cabinet door", "paper", "stickers", "magnets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178698.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223938, "question_id": "78r6Szw3virvGtnKbchGod", "question": "What is the person in red doing behind the bench?", "choices": ["running", "sitting", "walking", "bicycling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79571, "question_id": "79AGSipjxBbzqYXuDTuveu", "question": "The owner probably uses this animal for what?", "choices": ["caviar", "pork", "milk", "eggs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488091, "question_id": "79NY9q52v43xEaTwf98hY6", "question": "What part of the woman on the wall is covered in cloth?", "choices": ["wrists", "arms", "eyes", "fingers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488091.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427267, "question_id": "79jsapw35rVxWxjESHEoH3", "question": "What is on the back of the large red arms that come forward and down on the front of this bus?", "choices": ["hands", "mirrors", "phones", "suckers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415695, "question_id": "7AXr97YG9P23naTyMgsWTD", "question": "What shapes are seen on the screen of the laptop?", "choices": ["triangles", "rectangles", "squares", "circles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415695.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 166111, "question_id": "7BeNexcbfRAGMjSc6WMNrS", "question": "What venue is the animal in?", "choices": ["zoo", "elephant barn", "wilderness", "park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000166111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139968, "question_id": "7BfvYYurYELG6zy5SQgFHq", "question": "What room might this person be in?", "choices": ["kitchen", "office", "bathroom", "game room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248248, "question_id": "7CTk5UPhiWvyBojbv4moPC", "question": "What item comes from this animal?", "choices": ["wool", "cheese", "silk", "ivory"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248248.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544033, "question_id": "7CeVV4oCSTd8qfNvTepWih", "question": "What type of craft is being done?", "choices": ["quilting", "leatherwork", "embroidery", "ceramics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544033.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400424, "question_id": "7CoKyeiTYGnW8vxWtjTTPo", "question": "The pipe seen open at the base is used for what?", "choices": ["drainage", "garbage disposal", "pneumatic chute", "stop sign"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303460, "question_id": "7D2KRJkJa76MbzwfSu5EuH", "question": "What is stuck to the leftmost wall?", "choices": ["poster", "faucet", "nail", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32127, "question_id": "7DuAKHJzufqBDULEaAHq6c", "question": "Who is taking a photo of this bike?", "choices": ["passer by", "no one", "train conductor", "owner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137180, "question_id": "7F7tY6nzycEQgGgDPq2oNN", "question": "What kind of game is pictured above?", "choices": ["skiing", "walking", "gliding", "snow boarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137180.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267171, "question_id": "7FHjtjnSSuCC5oNmrryN55", "question": "What kind of zebras are these?", "choices": ["wild", "livestock", "captive", "house pets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267171.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363235, "question_id": "7GKarjvr9TFXwGa47R8rq9", "question": "What time is it?", "choices": ["345", "1045", "11", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180709, "question_id": "7HY4cMzuyarDLCBnmDwRwu", "question": "What is the man cleaning?", "choices": ["ears", "teeth", "fur", "eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331838, "question_id": "7JN5XmMGfccNuqdWRf4Com", "question": "What is the giraffe on the left inspecting?", "choices": ["logs", "food", "giraffe", "salt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331838.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288508, "question_id": "7KMGFDfxZhi8hNQLQB277a", "question": "How has this sandwich been prepared for serving?", "choices": ["cut", "shredded", "poured", "scooped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198908, "question_id": "7L6MLgtThr6dYaAfqJEaDZ", "question": "What type of rocks are used in the pot?", "choices": ["granite", "cement", "pebbles", "slate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321671, "question_id": "7LnHeewD27N8BtsPTnbXNe", "question": "What is sitting next to the laptop?", "choices": ["drink", "food", "vitamin bottle", "book"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321671.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34914, "question_id": "7N6C7bLzgw5ahUFKHFyK9L", "question": "What type of flower is in the vase?", "choices": ["tulip", "lilac", "sun flower", "rose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560203, "question_id": "7PNaCtHLHinGRSAHmat8w6", "question": "Why is the man carrying poles?", "choices": ["balance", "fashion", "visibility", "self defense"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560203.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342764, "question_id": "7Phuhjzqdv62HRLQdAuk4r", "question": "What is the bread shown here made to resemble?", "choices": ["cats", "dogs", "puppets", "emoji"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243257, "question_id": "7PiJXyjCUpBvH4gE7ACP4Q", "question": "What are the bands on the man's wrist for?", "choices": ["circulation", "sweat", "carpal tunnel", "fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23578, "question_id": "7QuDnomts2gqdpUjivJnY6", "question": "What body part is likely to hit the ground first?", "choices": ["left hand", "right hand", "butt", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 148326, "question_id": "7QvFm9bnP4ge55ywSiQgK7", "question": "Where is this elephant located?", "choices": ["desert", "jungle", "zoo", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000148326.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310657, "question_id": "7RJgSAZdicUSoeGopw2NhB", "question": "What is the station code for this train's destination?", "choices": ["cor", "con", "coy", "crn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18646, "question_id": "7RKJeKaqd7QQYVxRZoLMh8", "question": "This animal has a very long what?", "choices": ["snout", "talon", "tail", "neck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000018646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366228, "question_id": "7RuXUTpRnHGSnp7QS2DhQu", "question": "In which continent is this train station located?", "choices": ["north america", "asia", "africa", "europe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482640, "question_id": "7SwHQpaUk7GgTvDr8MFmRF", "question": "The player represents what nation?", "choices": ["denmark", "france", "canada", "brazil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66044, "question_id": "7TSaah6ria3NWkUMXSsPdf", "question": "What is the little kid sitting in?", "choices": ["rocker", "highchair", "booster seat", "stool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314102, "question_id": "7TbHWCyhRyJoyyBYTV49Gt", "question": "What is this person about to do?", "choices": ["strike out", "nothing", "miss ball", "fall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314102.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435831, "question_id": "7U3NdBsDuddu4GZ9fDv5PL", "question": "What did the other team probably do to make the pitcher look down at the ground?", "choices": ["score", "swing", "taunt", "strikeout"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435831.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358914, "question_id": "7U64kLbQfJLjNkK8KqBY8j", "question": "What purpose involving the tree does the wire around it serve?", "choices": ["cammo", "bird perch", "art", "protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158069, "question_id": "7UbBxSxaPoQwq3owmGjhnN", "question": "In which country is this double-decker bus operating?", "choices": ["america", "canada", "australia", "united kingdom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487273, "question_id": "7W4YmmHyZEnrA37mnZwHHc", "question": "What type of animal is seen?", "choices": ["aquatic", "flying", "insect", "reptile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487273.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541890, "question_id": "7XCG7ZHBrmwXYSENN9nSNk", "question": "What form of payment is accepted here according to the sign to the left of the door?", "choices": ["credit cards", "checks", "bitcoin", "bartering"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541890.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552232, "question_id": "7ZLdEj5dHos4tiAoHXyQFp", "question": "What brand are both remotes?", "choices": ["jvc", "sony", "samsung", "bose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552232.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284952, "question_id": "7aLoqB9E9F989RUJF6p9Nw", "question": "What type of sign is shown?", "choices": ["warning", "directional", "brand", "sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298244, "question_id": "7ahdszkj7LvTdec74pvdH3", "question": "What is the paper in this room used for?", "choices": ["writing", "reading", "wrapping", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298244.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 564054, "question_id": "7bW2LPP8udRk5D9NXfrUDk", "question": "What likely made the path in the ground?", "choices": ["animals", "cars", "shovel", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000564054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477393, "question_id": "7cKMod8rvH5zY9HeoDmsN5", "question": "How does this animal get around?", "choices": ["fly", "hop", "swim", "slither"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477393.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 43906, "question_id": "7dTT3ySgvCqrgjSHbQrx9V", "question": "What material is the coat made of?", "choices": ["wool", "yarn", "cotton", "denim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000043906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186307, "question_id": "7dU88MpSNnCykBxMnjfXzS", "question": "What is next to the stuffed animal?", "choices": ["cat", "toy", "plant", "baby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186307.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463181, "question_id": "7edMbpJBSqbH9HmPW9v2if", "question": "Why is he holding the board like that?", "choices": ["showing off", "hiding it", "for sale", "keep dry"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000463181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519167, "question_id": "7ehQafc8FWc36BLCkhkRir", "question": "How was the egg cooked?", "choices": ["poached", "soft-boiled", "fried", "hard-boiled"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519167.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411306, "question_id": "7fAsJH2JSYDiQXDU9CWbHV", "question": "Which species feet is more visible here?", "choices": ["rat", "human", "bird", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98774, "question_id": "7fq44oVbQ5juVwZe4wbWrG", "question": "What type of basket might the little chick be found in?", "choices": ["picnic basket", "sewing basket", "knitting basket", "easter basket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098774.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 489441, "question_id": "7gLruQ2akg6sssZpL9TdKk", "question": "What is the silver plate on the left made from?", "choices": ["glass", "metal", "wood", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000489441.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575313, "question_id": "7hAv545FF3EmgAZ5Yc5oev", "question": "The item the doll is holding is related to what profession?", "choices": ["baseball player", "carnival barker", "police officer", "dentist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575313.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80851, "question_id": "7hEwQupXuEJSBEXojggavj", "question": "Where is this bathroom located?", "choices": ["office", "mall", "home", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535616, "question_id": "7hgGBbY9nPTTbvyMiC8mbL", "question": "Why is that substance spread on the pan?", "choices": ["add flavor", "prevent fire", "prevent sticking", "make crispy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535616.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127946, "question_id": "7hv6u5R79CXbmmzrXpzvkg", "question": "Which of these mammals do these animals most resemble?", "choices": ["dogs", "whales", "horses", "walruses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379200, "question_id": "7iAkHXnSrCVhQ8ew5yQ9Lv", "question": "What is the only direct selling company that has more representatives than this one?", "choices": ["amway", "herbalife", "mary kay", "tupperware"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175183, "question_id": "7j3V4TRxFPkEGNxGphAZrz", "question": "What is this train most likely carrying?", "choices": ["tourists", "livestock", "passengers", "medical supplies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175183.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 225114, "question_id": "7k7KkXtttJ7LRhojPgF2NW", "question": "Where is the food located?", "choices": ["refrigerator", "mouth", "hand", "table"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000225114.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410941, "question_id": "7kDwoZnN4asbgvCiMx3yzm", "question": "What is needed to play this sport?", "choices": ["basket", "stick", "racquet", "bat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410941.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226110, "question_id": "7kpp9qaZbDwyLfTVtE3qYe", "question": "What will the bear likely do next?", "choices": ["run", "drink", "wake up", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226110.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194512, "question_id": "7mDfFXZA6SpR4G6tdAjaK4", "question": "What is the food in the shape of?", "choices": ["clock", "swan", "spaceship", "bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000194512.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466983, "question_id": "7mNPmvm9DKC8KiYckQ6PeP", "question": "What kind of area is shown?", "choices": ["commercial", "residential", "arctic", "desert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466983.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136481, "question_id": "7ooVj5f6csX7SG4zvQYbSW", "question": "How does it feel to ride the skateboard here?", "choices": ["bumpy", "smooth", "jagged", "slippery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136481.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137978, "question_id": "7pXmy3AmKnBkrDzYppwWeR", "question": "What is the profession of the person who sell this item?", "choices": ["teacher", "janitor", "athlete", "florist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38687, "question_id": "7pZMpftnTFEE4R2un3tW8g", "question": "The symbol on his shirt is said to represent what?", "choices": ["jezebel", "azazel", "baphomet", "delilah"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540289, "question_id": "7pgBTsdfjsiYgxkYC5gfMA", "question": "What state owns this department store?", "choices": ["qatar", "bahrain", "saudi arabia", "dubai"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000540289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186590, "question_id": "7phbf32T6cCeGQBY5BhGUM", "question": "What vegetable is missing from the food on the left that was used on the food on the right?", "choices": ["carrot", "beet", "squash", "tomato"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537997, "question_id": "7q9BbYrpoE9YbiVJ2ttB4L", "question": "Why is the left elephant smaller?", "choices": ["malnourished", "genetics", "female", "younger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451685, "question_id": "7qFTQF3j9XTUbbYuVLYUAL", "question": "What usually touches the white item?", "choices": ["bread", "humans", "cows", "flowers"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000451685.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579040, "question_id": "7qy28qTPgrKTMHiULbnoDT", "question": "What type of license does the driver of this vehicle need?", "choices": ["cdl", "motorcycle", "school bus", "auto"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262854, "question_id": "7sQFnQpTx97nbopbr6HYrA", "question": "What does the yellowish sign imply one should do when driving here?", "choices": ["slow down", "speed up", "aim hitting", "halt"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000262854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366552, "question_id": "7tn2vSFsc3BWLKdGz7jof4", "question": "What activity is the animal's owner likely to be able to do here based on the objects around?", "choices": ["hair cut", "run", "web browse", "shower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366552.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 266699, "question_id": "7uCHteyGXfGwHMRUf5UcR4", "question": "What is the same color as the jacket that the man has on?", "choices": ["cherry", "strawberry", "banana", "blueberry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000266699.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513753, "question_id": "7uEAhk8qAawUAXewiCUerP", "question": "Why is he holding him?", "choices": ["pushing", "apprehending", "helping", "afraid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362443, "question_id": "7v9JMYuhUEhU7bFefB3hBj", "question": "Which direction would you need to go to reach the Cliff House?", "choices": ["west", "south", "north", "east"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 370221, "question_id": "7vCi2VwESXTaT6PmupKacx", "question": "What is the fence made of?", "choices": ["wrought iron", "brick", "barbed wire", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000370221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107266, "question_id": "7xXb3oQeNUyU2zMgvS5ZTh", "question": "This train connect which major cities?", "choices": ["germany", "us", "italy", "canada"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000107266.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473535, "question_id": "7yHA7CzHQNLfpP4BqrvG7C", "question": "What is the man wearing?", "choices": ["snowsuit", "motorcycle suit", "work jumper", "firefighter suit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196788, "question_id": "7zfYy9b53BYZJK2QMBAHCS", "question": "What was the silver item to the right used for?", "choices": ["holding", "slicing", "eating", "oiling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196788.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71632, "question_id": "827NgQhEHAjo3mstFXhsKd", "question": "What kind of environment are the bikers in?", "choices": ["plains", "steppe", "mountainous", "savanna"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071632.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171332, "question_id": "82QDA8num3497V8dhepEqF", "question": "What kind of fish is depicted on the bears shorts?", "choices": ["angel fish", "tuna", "salmon", "gold fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171332.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175062, "question_id": "83R7PoHEF4Wog68UvxroNk", "question": "Which shape cannot be found in the design of the clock tower?", "choices": ["rectangle", "circle", "octagon", "triangle"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000175062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299989, "question_id": "84CiD4zKDxXB9jWQxttPT4", "question": "What Fahrenheit value must the weather be for the stuff around the stop sign to form?", "choices": ["below 32", "244", "100", "above 32"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299989.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95642, "question_id": "8593uDiP3oEqqnAs7kYaxr", "question": "The tool she has will eliminate what?", "choices": ["lint", "lice", "tangles", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095642.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446071, "question_id": "85DPbJJW9zqBdCtFzc8ZPM", "question": "What items might be in the case?", "choices": ["makeup", "cleaning supplies", "tackle", "tools"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000446071.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410714, "question_id": "86ZELf36SwPhiK4zdWm2SV", "question": "What weather is occurring?", "choices": ["overcast", "rain", "clear", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289843, "question_id": "872aByHNXEVRRDX9LqTS4S", "question": "What spice is associated with the yellow flowers seen here?", "choices": ["radish", "mustard", "saffron", "soy sauce"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000289843.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178428, "question_id": "87hhVjbyMh6VcsQLhMvCLC", "question": "By which leg might the surfboard be held if the surfer here falls?", "choices": ["right", "left", "none", "both"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178428.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520498, "question_id": "88ETmaShueW9TYsYAHRDwN", "question": "What is the state of the man?", "choices": ["tired", "relaxed", "euphoric", "elated"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324216, "question_id": "8928tMkAoTshshZUpJjipJ", "question": "What is the ring on the wall used for?", "choices": ["robes", "toilet paper", "towels", "decoration"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000324216.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138718, "question_id": "8ASqNSW5yAY7HJQfMyrAu9", "question": "What word in the photo scares you?", "choices": ["danger", "ice", "blood", "horror"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138718.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206517, "question_id": "8BjebRYppShxMye3RqPXyi", "question": "Which teddy bear is dressed as a famous landmark?", "choices": ["middle 1", "bottom 2", "bottom 1", "top 1"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206517.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541795, "question_id": "8ByYGBEwdyyiQCtpKbS4aA", "question": "What is the person doing?", "choices": ["snowboarding", "ice skating", "skiing", "sliding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541795.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427735, "question_id": "8CupNnbc5rxSKHphkVcCk4", "question": "Where is the man walking?", "choices": ["space", "roof", "driveway", "beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65142, "question_id": "8EMzgPKrRV6KmE88KSwiDf", "question": "What is flying in the air above the vehicle in the desert?", "choices": ["flag", "parachute", "balloon", "kite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518963, "question_id": "8ERq2vqEVgKB89LCq3MoDC", "question": "The person here is likely what sort of enthusiast?", "choices": ["reading", "writing", "photography", "singing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19163, "question_id": "8EUnHLo32fUhECSw2ojvLm", "question": "What vegetable tops the foremost meat here?", "choices": ["carrots", "radish", "cabbage", "beets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019163.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297433, "question_id": "8Ebj9dNGsq5Ges2tW47sT4", "question": "The calorie content of this dish is probably what?", "choices": ["moderate", "high", "low", "zero"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30268, "question_id": "8EgL896emr48N25g4H8Qke", "question": "What is the predominant shade of the clock tower?", "choices": ["orange", "grass", "gold", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030268.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48875, "question_id": "8EjeKLvgvWx2bmsS77oxko", "question": "What hairstyle does this person have?", "choices": ["beehive", "pony tail", "pig tails", "crew cut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048875.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460521, "question_id": "8F8rhgSqjNBo3BMt3WBu6f", "question": "How many states does the nation whose flag is displayed have?", "choices": ["28", "30", "50", "14"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19315, "question_id": "8FzC6GVCQkuuhTe2ZmvcQS", "question": "What is the water touching besides the boat?", "choices": ["shark", "human", "tree", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425217, "question_id": "8G88q4ZeWc9FntqAuaHXmm", "question": "What is the air temperature surrounding the brick building and clock tower?", "choices": ["cold", "warm", "chilly", "freezing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000425217.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77250, "question_id": "8GagrzjD2JTyL99VgfHRZ2", "question": "What type of lines are shown?", "choices": ["waiting", "computer", "electric", "fishing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077250.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 236899, "question_id": "8GjJiuwfpVsLAxP3Ffm8Q2", "question": "What is aboard this vehicle?", "choices": ["packages", "fish", "cars", "tourists"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000236899.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518193, "question_id": "8HbC4ANAoGgn5Crirc7qWv", "question": "Where is the most likely location of this bathroom?", "choices": ["canada", "england", "asia", "united states"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000518193.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125221, "question_id": "8JoisoRPvKxvFvQvTDx3SL", "question": "What has the Nintendo Wii console been modified to control?", "choices": ["speaker volume", "mouse cursor", "power button", "keyboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552653, "question_id": "8JpnifjroCxDBztPRKSoKY", "question": "The cartoon character on the floor mat depicts what type of animal?", "choices": ["dinosaur", "turtle", "crocodile", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563383, "question_id": "8Knm6eSMS3ujWiiSLrseEQ", "question": "What is the man above?", "choices": ["street", "water", "building", "horse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563383.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424067, "question_id": "8Kv7FiWzGAUqb868sX7Hyg", "question": "What are the circular metal rings for?", "choices": ["hiding money", "moving water", "lifting tub", "opening bottles"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000424067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333273, "question_id": "8L6jwCSAvpmBdCDmUcX44B", "question": "Who is holding the leash?", "choices": ["owner", "surfer", "dog", "sunbathers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333273.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283626, "question_id": "8LPpqQAGGUbHZFXopweBPf", "question": "It is just before what time according to the clock?", "choices": ["700", "1200", "900", "500"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321477, "question_id": "8Lca4JgEGka6nfRTjZvwWr", "question": "What is the zebra doing?", "choices": ["grazing", "leaving", "hiding", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321477.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351492, "question_id": "8NhT3cNjjL23qKxzK4PPRr", "question": "Which street has a masculine name?", "choices": ["neither", "right", "both", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434601, "question_id": "8Pq3wfUtMrnyWXrwm6Kvbr", "question": "Where is this movie playing?", "choices": ["airplane", "car", "boat", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92263, "question_id": "8R2KzxeekuSpk5RfPeTq8Q", "question": "What does it look like this person is doing?", "choices": ["macarena", "jitterbug", "back handspring", "crane kick"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000092263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414818, "question_id": "8RHFtVtibgGApErgCmFX5R", "question": "Which one of these might have come in the box below the desk?", "choices": ["vases", "lamps", "rugs", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408726, "question_id": "8RJox8ftvnmkcjEGAcGu3F", "question": "Unlike most cakes this one has no what?", "choices": ["roses", "sprinkles", "tiers", "icing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000408726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45107, "question_id": "8Rp8cLYRexmLmxBcfPaZzM", "question": "How many types of bathing are possible here?", "choices": ["zero", "five", "one", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045107.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435693, "question_id": "8RuB9WhSe2iSe22W7s8yhd", "question": "Which direction is the light coming from?", "choices": ["right", "left", "up", "down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435693.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257623, "question_id": "8Rz5LvsfVur6oxkhQc35bh", "question": "At least how many other people are with this lady at the park today?", "choices": ["one", "20", "none", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257623.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162323, "question_id": "8SBRNjYm2LdqguQoeJ6wjq", "question": "What is something this animal usually eats?", "choices": ["dolphins", "worms", "monkeys", "cows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162323.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581585, "question_id": "8SUw5hNmmn8TNWNz8Zu2vq", "question": "What is needed for this activity?", "choices": ["ice", "rain", "snow", "waves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157189, "question_id": "8SqLyGwUbknL7x59eNzdZe", "question": "What food group is available?", "choices": ["fruit", "meat", "grains", "dairy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157189.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317117, "question_id": "8T7sxfNb3sVUei4iraB5yG", "question": "What is causing the giraffe to chew on the other giraffe's fir?", "choices": ["taste", "comedy", "bright color", "hunger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317117.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243162, "question_id": "8TNTrrqm6iRzzy5hwW4MT9", "question": "What is in the most danger?", "choices": ["cat", "bird", "sun", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309623, "question_id": "8WEDnQzZJRK2cuRfqNU53Q", "question": "What Royal House is deposed in Townsend's book?", "choices": ["avon", "windsor", "cambridge", "essex"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309623.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339300, "question_id": "8WaHNvtiwnT5h6K8rUfTKg", "question": "Who placed this animal in this basket?", "choices": ["it's owner", "humane society", "villain", "nobody"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230942, "question_id": "8WkCkSP97qvQerEGgTPnJ4", "question": "Based on its headdress the chair should be considered a what?", "choices": ["toilet", "futon", "throne", "stool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124108, "question_id": "8WwYxt2VY6MHSn28qgdQAj", "question": "What is the bird looking for?", "choices": ["rodents", "bugs", "worms", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124108.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517514, "question_id": "8XHfKeAy2f4RLQt6nC5xHy", "question": "What type of of animals are shown?", "choices": ["tropical", "arctic", "farm", "domestic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195789, "question_id": "8XJ5SuZ8iE73ZLdBXRUQyq", "question": "What is drawn on the Stop sign?", "choices": ["flower", "web", "egg", "eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23220, "question_id": "8XSKLi7ZC8gELpUpcEMK9x", "question": "What skateboard trick is the man performing?", "choices": ["720", "grind", "ollie", "kickflip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 274798, "question_id": "8YRkectTdwT3SDKGypz5bD", "question": "What are the purplish vegetables called?", "choices": ["turnips", "eggplant", "red onions", "beets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000274798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197761, "question_id": "8Z4A5CmRYdRHMaDzsJvyVA", "question": "Which republic flies this flag?", "choices": ["france", "liberia", "georgia", "china"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352825, "question_id": "8ZVCFxqrRUwcUxqFXgRhjj", "question": "Who took that photograph?", "choices": ["annie leibovitz", "carey haider", "robert frank", "man ray"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352825.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349422, "question_id": "8ZpQyn4MdaXehRvCvR3gA4", "question": "What happens if Scarico pushes the button?", "choices": ["voting", "flush", "hot air", "camera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173337, "question_id": "8bzzkSPvqdUipSZmeQcvJP", "question": "What substance shown here is not usually served with French fries?", "choices": ["carrots", "mayo", "ketchup", "hot dogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173337.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 132177, "question_id": "8cMF4yCGmuJUgDKPwx9iQb", "question": "Where is this child located here?", "choices": ["home", "prison", "hospital", "rest stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000132177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512628, "question_id": "8crgqMAT3Wi6JXm3Q3BztY", "question": "What does the giraffe on the right seem to be doing?", "choices": ["eating", "sleeping", "mating", "bathing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512628.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 307127, "question_id": "8dnGHHXr3oVHkwzbSZz4JF", "question": "Which animals depend upon this body of water for their water needs?", "choices": ["cows", "none", "sharks", "whales"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000307127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98313, "question_id": "8dxaP8zJ6fq9pasnqecdUJ", "question": "What is needed for this activity?", "choices": ["wind", "ball", "waves", "court"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098313.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364163, "question_id": "8e8Js2bnGGjkYjPxdoDBmg", "question": "Why is the man using an umbrella?", "choices": ["rain", "disguise", "sun", "snow"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000364163.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189977, "question_id": "8exacYESvjcwLQbRpM7693", "question": "What type of animals are shown?", "choices": ["wild", "domestic", "aviary", "reptile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100997, "question_id": "8fBQTf5nEnhsiaqhmGyzvY", "question": "What kind of people are served at this building?", "choices": ["veterans", "sick children", "sick adults", "drug addicts"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000100997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302226, "question_id": "8fEn3Ap5Tkb6UjerRfk9U3", "question": "What is another name for the brush glove?", "choices": ["dog glove", "mitt", "baseball glove", "paper"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000302226.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137784, "question_id": "8h4d2cYyBDZZTNnMgF6eY8", "question": "To gain access to water here which direction should one turn the cap?", "choices": ["clockwise", "up", "counter clockwise", "in"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222164, "question_id": "8hFp84WZYHvRh9fctZ6iPg", "question": "What type of magazine is on the counter?", "choices": ["tabloid", "sports", "gardening", "gaming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222164.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27924, "question_id": "8hWK7my6L2A9BPUBqVjrHC", "question": "What are these vases normally used as?", "choices": ["painting", "snow globes", "water balloons", "light bulbs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027924.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409846, "question_id": "8hWxgKnSyPQEeHeaTLVSaw", "question": "What finger is this man using to scroll?", "choices": ["ring", "thumb", "pinky", "index"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 134720, "question_id": "8heSZFKFiSsVGjCiqZWX7x", "question": "What is are athletes doing this sport trying to achieve?", "choices": ["speed", "trick points", "rotation", "height"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000134720.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283065, "question_id": "8jp2Pej5C5Fm7ZYKu4mZiW", "question": "The word that is highest up on the subway sounds closest to the name of what Witcher character?", "choices": ["renfri", "ciri", "dandelion", "eist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332118, "question_id": "8jzFqhmqg3SNPGVfXQGEEY", "question": "What material is the two pans made of?", "choices": ["aluminum", "cast iron", "steel", "tin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501676, "question_id": "8kbzp7KJWtcsr33KFzu6ow", "question": "What is the man wearing a red shirt ready to do?", "choices": ["catch frisbee", "bike", "dance", "run"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501676.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175396, "question_id": "8kvLiAJoZPTEaAt9YRKuEz", "question": "What pastime can someone enjoy here while laying in their bed?", "choices": ["singing", "tennis", "basketball", "tv viewing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438062, "question_id": "8m9mpG8YMDB3Gx2xNNAMar", "question": "What is the longest word on the sign?", "choices": ["three", "proceed", "loading", "stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362501, "question_id": "8mSUwEFLQfAgnbdvH4TKCd", "question": "Which vehicle would win if the race was ended this second?", "choices": ["car", "airplane", "motorcycle", "carriage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362501.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227921, "question_id": "8nq6yZ4dm9LqxUtG3aRBgq", "question": "The plane on the runway carries what as its cargo?", "choices": ["medical supplies", "passengers", "military supplies", "mail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227921.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161791, "question_id": "8oYTrkUwdUUyQgDcAK85VZ", "question": "What type of animals are walking on the dirt?", "choices": ["cow", "camel", "zebra", "horse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161791.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125025, "question_id": "8piR8KZMNZcL9FKtBtYCmG", "question": "What item is usually kept in this room?", "choices": ["cake", "paycheck", "toothpaste", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125025.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499231, "question_id": "8qGJ34bHvTS6YNYYuNcbdp", "question": "What structure is on the wooden panels on the floor?", "choices": ["statue", "wheelbarrow", "box", "bench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452423, "question_id": "8qjVvKczupHcYaS5s9ERJr", "question": "What type of food is shown besides the vegetables?", "choices": ["cheese", "fish", "pasta", "meat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163097, "question_id": "8qsFpkSx3TwPUe9LQbY93z", "question": "What is the color of the sticker that is in the middle of the top part of the refrigerator?", "choices": ["yellow", "black", "purple", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450646, "question_id": "8ri5nUMK4aAZMzbaho4KcV", "question": "Which animals have hooves here?", "choices": ["cow", "zebra", "rhino", "horse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16835, "question_id": "8sWm6pLgPwErxwBGB7RvDB", "question": "This airline was absorbed into what other airline?", "choices": ["united airlines", "air france", "canadian airlines", "delta airlines"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226025, "question_id": "8sb2egipSTRCjNQMSCERF7", "question": "What type of enclosure is seen?", "choices": ["fence", "gate", "cage", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226025.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198664, "question_id": "8sgMEFmqTsp9a8EqfdMv9V", "question": "What direction is the arrow pointing on the yellow sign?", "choices": ["up", "left", "diagonal", "down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309837, "question_id": "8swFzTB3KtdrQk8Eubb9yi", "question": "What illness can this person get if they do this movement too often?", "choices": ["scoliosis", "diabetes", "plantar fasciitis", "carpal tunnel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281240, "question_id": "8tZWhEJWjdPmPBHbm28YD8", "question": "What number is the green toy closest to?", "choices": ["eight", "one", "two", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111444, "question_id": "8uGCydmPBquqz5nEXBDUzt", "question": "What kind of pattern is on the vest of the dog?", "choices": ["tartan", "gingham", "argyle", "plaid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111444.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15713, "question_id": "8uMeoif989moEQ5iGKhx83", "question": "What is the function of the object on the woman's right wrist?", "choices": ["computer", "time", "sweatband", "fitness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3208, "question_id": "8upiqFeSpz7LV9Nd8cf4Fr", "question": "What was this aircraft built for?", "choices": ["space travel", "commercial flights", "display only", "military service"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476378, "question_id": "8vqSJRaXNpnYxKHtboXYyq", "question": "In which direction to shore does this person surf?", "choices": ["perpendicular", "parallel", "over", "underneath"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476378.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196667, "question_id": "8wZ5b3DmUn4BTfHF6knCJ8", "question": "What is near the back of the plane?", "choices": ["garbage bins", "bears", "orange cones", "people"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000196667.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294464, "question_id": "8x3EQaPKCAzpKAHRe5Zekk", "question": "What is the user of the laptop doing?", "choices": ["reading news", "replying email", "online learning", "online shopping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448305, "question_id": "8x77xV9WwcHhCNfPLQ7SEU", "question": "What kind of animals are shown?", "choices": ["domestic", "aquatic", "wild", "reptile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448305.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7339, "question_id": "8xG3HDKf7wd9iSWi9a7SLS", "question": "What type of vehicle is the car parked next to?", "choices": ["ambulance", "fire truck", "taxi", "school bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167076, "question_id": "8xSxaJuyvp63SKXYEjpduT", "question": "Where is this person coming from?", "choices": ["house", "car", "metro", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420252, "question_id": "8ygKdsWV7Hmnnj52qGATY7", "question": "Which giraffe would be most likely to be surprised by the photographer?", "choices": ["left", "middle", "none", "right"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000420252.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460489, "question_id": "93iB3ezHu9FZ3aNZ6K29pw", "question": "In this ship the orange color indicates what?", "choices": ["rescue", "none", "abandon", "safety"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000460489.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238362, "question_id": "94k2J3Yr78vNbe6Ws37bZH", "question": "What will these meats be put on for serving?", "choices": ["glasses", "buns", "toothpicks", "leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000238362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92150, "question_id": "94r4iQAEJ4wxpgZeHYiMKx", "question": "What is between the fences?", "choices": ["trees", "houses", "rocks", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326942, "question_id": "968FZNVAt23y98dJcpsN5z", "question": "What does the sign pictured above mean?", "choices": ["yield", "no entry", "no way", "give way"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297128, "question_id": "96jsfjaVewEutS66GoFZ99", "question": "What is the condition of this animal?", "choices": ["flying", "robust", "sickly", "swimming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385883, "question_id": "96rTt5oSxU9gC8HNYHyMJr", "question": "The large animal seen here is meant to look like an animal that habitats where?", "choices": ["equator", "north pole", "mountains", "deserts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385883.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443267, "question_id": "99VPfSi9P7RiwCbeLhxe6y", "question": "The bus is the same color as what character?", "choices": ["charlie brown", "snoopy", "garfield", "smurf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470171, "question_id": "99WUPUyjbWKEsjw5rsaYL3", "question": "What is the cat doing?", "choices": ["eating fish", "dipping paw", "running", "hunting mice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470171.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89371, "question_id": "99eZkKLojwRfvQGw54g9fZ", "question": "What feature do these animals have?", "choices": ["antlers", "stingers", "wool", "tusks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177760, "question_id": "9B5FuE5MsUUmGkXrCh6Y6w", "question": "What is this type of bird food often called?", "choices": ["soup", "nectar", "syrup", "seed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416415, "question_id": "9BwwnkfZ6FLYsGor6P7mqB", "question": "What is the circular area on the large tower used for?", "choices": ["landing planes", "telling time", "skydiving", "target practice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000416415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375984, "question_id": "9CFQZsW9kD5a7znWSw6y2n", "question": "Where is this laptop being operated?", "choices": ["office", "dorm", "cafe", "home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000375984.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22042, "question_id": "9CQgaQhc4vjSDhdQQqi5Qo", "question": "How has this sandwich been prepared?", "choices": ["sliced", "cubed", "diced", "shredded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022042.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158503, "question_id": "9DjkxeSWGPLFRzfYMm4USE", "question": "What is to the left of the window?", "choices": ["bed", "door", "light", "chair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158503.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39213, "question_id": "9DkvZJAAfKC7sC3nigLzia", "question": "What type of weather is here?", "choices": ["partly cloudy", "cloudy", "sunny", "mostly cloudy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039213.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415688, "question_id": "9E2qtAVn7tBTiA6Nqd3GUt", "question": "What appears to be missing from the bathroom that could prevent a flood?", "choices": ["scrub brush", "hand soap", "plunger", "hand towel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252268, "question_id": "9EBCujwxTXSqNAfEbuqByB", "question": "Which item on this pizza should be spread out more for an ideal taste?", "choices": ["herbs", "tomato sauce", "meat", "cheese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252268.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532528, "question_id": "9F6iZ78SqDFndTqVddeVNA", "question": "What resembles the design on the wall?", "choices": ["pear", "chessboard", "bat", "apple"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000532528.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490241, "question_id": "9FJuvnwnQfUgqyUqULgiVu", "question": "What phenomena might frequently obscure vision when driving Ocean St?", "choices": ["lightning", "snow", "gulls", "fog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319149, "question_id": "9GqzQ8Fv2DfVQ6WW7rNVSZ", "question": "What is the largest object used for?", "choices": ["closet", "storage", "refrigeration", "microwave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319149.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75781, "question_id": "9J6PHiYk2ezdrYofTYWBNZ", "question": "What type of transport is important to the wearer of this tie?", "choices": ["sailing", "cycling", "rowing", "car racing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503708, "question_id": "9KGXs8GbYtWmsvGjBTDusq", "question": "What are these elephants ready to do?", "choices": ["eat", "hide", "drink", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356001, "question_id": "9KTuPuuseiRBs9r7WfHYDM", "question": "What is the disassembled device on the table called?", "choices": ["shaver", "hair trimmer", "electric toothbrush", "lamp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356001.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467005, "question_id": "9LADUTwBVqurqAm29gZopo", "question": "What is logically missing in this photo?", "choices": ["dog", "bike", "car", "person"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467005.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6128, "question_id": "9LYeuDdciECXyfaHemxwgD", "question": "Why is the Royal Ontario Museum sign have a blue background?", "choices": ["warning", "tourist information", "regulatory", "guidance"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000006128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 423387, "question_id": "9NA9GyyjhhLmaSbGoxFtsy", "question": "Which color indicates danger?", "choices": ["red", "yellow", "green", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000423387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567468, "question_id": "9NTxmdPQAfaZZ4ZNB6nuN3", "question": "Which one of these cities would the graffiti artist hate to live in?", "choices": ["jerusalem", "san jose", "toronto", "sydney"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000567468.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314746, "question_id": "9NUdgiBetWUkwwUqVZWExf", "question": "What does the object on the cat's neck commonly pair well with?", "choices": ["bird", "dog", "jacket", "dress shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314746.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81769, "question_id": "9PAUwjFVxiFoog8S2xQZFM", "question": "In front of what object are the animals currently?", "choices": ["house", "building", "mall", "skyscraper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102524, "question_id": "9PC7oEPxA3CEsbJwnrZvuQ", "question": "What color benches and tables might someone using this park for a picnic sit at?", "choices": ["white", "blue", "brown", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365335, "question_id": "9QCvwrszMhCgPGJuBWkgHG", "question": "What is the couple currently engaged in?", "choices": ["swimming", "running", "skiing", "surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252690, "question_id": "9Qn5kbZqgiDZmVUHM5VowT", "question": "What is keeping the giraffes from escaping?", "choices": ["water", "wooden fence", "grass", "each other"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252690.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100214, "question_id": "9RATnhDsvU7VBWy5HmFtA8", "question": "What was the last name of the original formulator of this beverage?", "choices": ["smith", "mclaughlin", "mccarthy", "o'reilly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100214.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357295, "question_id": "9RTZJs7wqiSxT78kgJC4kT", "question": "What is the snow structure supposed to simulate?", "choices": ["path", "half pipe", "ledge", "stairs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357295.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284619, "question_id": "9RnD9E9qKrz2Lm3F8RUoJm", "question": "Where are these zebras located?", "choices": ["wild", "circus", "zoo", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284619.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34943, "question_id": "9RyfqfWsZ5KYwYTQZyZbKz", "question": "What part of this persons body is likely to next hit the ground?", "choices": ["foot", "elbow", "head", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123440, "question_id": "9SPb9n3k6qdUNUuJX3KiHC", "question": "What is the sponsor of this event that starts with an F?", "choices": ["fila", "algida", "french", "free"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123440.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160024, "question_id": "9SiKfFSLAwkDDDRGProMdR", "question": "Where is this pie going?", "choices": ["trash", "dish", "in oven", "to table"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293203, "question_id": "9TVFU2kN7VmEjBdkaza726", "question": "Why is the swimsuit the man is wearing shiny and glossy?", "choices": ["protective coating", "lubrication", "wet", "aesthetics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293203.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9560, "question_id": "9Tyek4dBEbYPfNsysqbkeg", "question": "What structure is behind the vehicle?", "choices": ["bridge", "control tower", "skyscraper", "house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009560.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444398, "question_id": "9Uvy7Eh8rP9vw9xp58LSWy", "question": "What are they holding with their hands?", "choices": ["poles", "bats", "racquets", "paddles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 480378, "question_id": "9VSwHCDb3sBaDMT2cCs4yJ", "question": "What is this fixture used for?", "choices": ["catch", "paint", "charge", "clean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000480378.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49816, "question_id": "9Vcd7kTMRjVjgs6jqL57ro", "question": "Why is the elephant standing near the water?", "choices": ["hot", "thirsty", "lost", "dirty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049816.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293105, "question_id": "9WHVhyfXTZVysViEtdpuuE", "question": "Which direction is this person facing?", "choices": ["downward", "toward land", "toward ship", "upward"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293105.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488669, "question_id": "9WVMcY8LNU3tC752R7Vfno", "question": "What kind of food is in the nice glass platter?", "choices": ["quiche", "lasagna", "pizza", "tortilla"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519771, "question_id": "9WqGHBuRK3vcLbfm9CRT7m", "question": "What type of numbers are on this dial?", "choices": ["fractions", "decimals", "whole", "roman"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000519771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91679, "question_id": "9XSELnaT8eoFqjTqn3xKCu", "question": "What feature does this animal have?", "choices": ["wings", "stinger", "whiskers", "fins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091679.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104447, "question_id": "9Y9S62wXQyP6sjM7JGXhuZ", "question": "What material is the object made out of that is being used as a coffee table?", "choices": ["metal", "leather", "plastic", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339780, "question_id": "9YCMtc3QSNTW4iRNNUmVww", "question": "What would be the easiest way to disrupt this cats slumber?", "choices": ["switch lights", "start crying", "run water", "press handsoap"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339780.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513846, "question_id": "9YQnjcCeS9kJhetmHNMm8G", "question": "Persons here are able to dim ambient light during the day how?", "choices": ["stained glass", "venetian blinds", "no way", "curtains"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000513846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270276, "question_id": "9Ywq3tsgkzvffuwJh2uLPV", "question": "What is the name of the father in this animated series?", "choices": ["larry", "homer", "peter", "stewie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103809, "question_id": "9Z8eXFvHrwzjiREQGjvAQC", "question": "What family operates this company?", "choices": ["lawrence", "smith", "hilton", "barnard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463957, "question_id": "9ZY6PvydLbTWsNgiagCrZE", "question": "This outfit is ready for someone's what?", "choices": ["wedding day", "funeral", "bar mitzvah", "baptism"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463957.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439466, "question_id": "9a79DPyKjGXum8BC2DT8fB", "question": "What made the holes in the sand?", "choices": ["buildings", "monsters", "animals", "feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468316, "question_id": "9bJq2A7rRU5pudM5cuwBTH", "question": "What musical talent are the characters portrayed here famed for?", "choices": ["drum playing", "nothing", "singing", "fifing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468316.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294820, "question_id": "9bmY26aFyLqqo5JoCADSQV", "question": "What type of street is Cedar Meadow?", "choices": ["road", "avenue", "boulevard", "lane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294820.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252697, "question_id": "9cghif4rapQiSbsqEBuWsH", "question": "What crop is this plane flying over?", "choices": ["corn", "wheat", "oats", "rice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259397, "question_id": "9f4JNJzFJB34o7aPkR3uX4", "question": "Why is the man in red wearing a helmet?", "choices": ["halloween", "punishment", "style", "protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318387, "question_id": "9i78a9hq2Qyb8NdoxwwZeG", "question": "What has been done to the outside of the yellow train car?", "choices": ["wood work", "scuffing", "carving", "vandalism"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476640, "question_id": "9jWG4krg4k4q4kpj7XfaRF", "question": "Where is this bathroom located?", "choices": ["courthouse", "office", "library", "residence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567211, "question_id": "9kJcks4gEvbYMam3HRvJBa", "question": "What kind of animal is on the TV?", "choices": ["badger", "beaver", "skunk", "otter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420513, "question_id": "9khRKRrBs8cjvqGhCgxPDk", "question": "What is this person most likely doing to the wood?", "choices": ["painting", "stripping", "staining", "sanding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420513.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 228067, "question_id": "9kxPrtLhpEHowRCfms4Mha", "question": "What type of vehicle is above the cars?", "choices": ["train", "bus", "monster truck", "airplane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000228067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461150, "question_id": "9mXS3sSXxnmvbssWd7b3Ph", "question": "What is required for this activity?", "choices": ["rain", "snow", "sun", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144412, "question_id": "9mhiTUpa9rf39yzE89gRCR", "question": "What type of area is shown?", "choices": ["deserted", "residential", "commercial", "industrial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144412.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487152, "question_id": "9msLkmvBX78NVVCjjbDsjp", "question": "What holiday is being celebrate here?", "choices": ["christmas", "halloween", "easter", "new year's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487152.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281084, "question_id": "9nLpUKKSyYF2Cg3pWjfHun", "question": "What company makes the item the doll is holding?", "choices": ["mcdonald's", "oral b", "american express", "ibm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458038, "question_id": "9nbLJoweVHwuRHdTGt77xy", "question": "Who is the traffic signal for?", "choices": ["animals", "train", "pedestrians", "cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458038.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109660, "question_id": "9nyrsv8idx4kBCLQmGKX2p", "question": "The cat is more likely to create a letter using which implement?", "choices": ["keyboard", "claw", "ink pen", "felt pen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339224, "question_id": "9oGioftLkmLaVH85nyUYVK", "question": "What is the brand advertised in the back most well known for?", "choices": ["food", "cars", "technology", "movies"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000339224.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49922, "question_id": "9oHEZfm78ekecV9qZuUBhN", "question": "What is the cat sleeping on top of?", "choices": ["watermelon", "box", "refrigerator", "remote"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20209, "question_id": "9oJgDe3WQvULavPkccaFzP", "question": "When do we celebrate the holiday written on the back of this truck?", "choices": ["1st october", "1st january", "25th december", "26th december"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512197, "question_id": "9oQHaxAeTunQwT9aocmfD3", "question": "What is the most popular color of the fruit shown?", "choices": ["red", "green", "brown", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512197.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55162, "question_id": "9odcK8nFWwbo9XUA3EAVdj", "question": "What does the person have on?", "choices": ["crown", "necklace", "yoke", "helmet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470603, "question_id": "9okisa7r9JQUoBdL8w984u", "question": "What is in the gold-topped bottle?", "choices": ["bitters", "beer", "champagne", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470603.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98738, "question_id": "9osB6w4o5W3BWCVPJd9uiV", "question": "What letter is on the lowest row?", "choices": ["z", "d", "s", "w"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098738.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404682, "question_id": "9p3uUNVR9MUQnyREEWFjmm", "question": "What is the dog dressed up as?", "choices": ["princess", "king", "priest", "pauper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425808, "question_id": "9pGLzFhxPZLbRqEjrVCpaV", "question": "What is stopping the hill from being one continuous white surface?", "choices": ["stones", "water", "poles", "trees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000425808.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 42053, "question_id": "9pJsMjkJMDGEPAU6fVjBx7", "question": "What was used to make this bench?", "choices": ["steel", "wood", "iron", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000042053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542561, "question_id": "9poUzAxr4HeRQmyAtnP38x", "question": "What company makes the bus on the left?", "choices": ["zuffa", "mitsubishi", "volvo", "ford"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542561.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358261, "question_id": "9q7PJHUpH93TSCNrvtt4kb", "question": "Which one is least likely to stumble on an obstacle ahead?", "choices": ["front", "middle", "equal", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16211, "question_id": "9qFBu5dEAbDee26zPvwnyF", "question": "What is the main color in the bathroom?", "choices": ["red", "brown", "silver", "gold"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159920, "question_id": "9qJN9FmqBpPzXf5oa9ocDZ", "question": "What action is this man taking?", "choices": ["rolling", "falling", "descending", "ascending"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205235, "question_id": "9qfCbv2qQ3WWMhcKwXo5Y4", "question": "Victoria Railway station is located in?", "choices": ["mexico", "sydney", "columbia", "london"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446081, "question_id": "9rKm4TcQwqhKdv7QGAyQQv", "question": "Where is the animal located?", "choices": ["city", "wild", "jungle", "enclosure"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446081.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411106, "question_id": "9uvuGuSmexF9K9qQvf3S6H", "question": "What helps this person to slow down?", "choices": ["wind", "paddle", "wave", "monkeys"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125300, "question_id": "9vKkbLLzar6TbULgfPgQpU", "question": "What is he doing to the elephant?", "choices": ["feeding", "scaring", "killing", "washing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113682, "question_id": "9vPbNYR5KbtJAmDMvNVSrT", "question": "What might happen if the weather stays the same?", "choices": ["delays", "burns", "shoveling", "discounts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565626, "question_id": "9xKT6JCeaQ2T3i9FkCthoD", "question": "What wattage should a bedside lamp?", "choices": ["100 lumens", "300 lumens", "400 lumens", "500 lumens"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336000, "question_id": "9xUU4UyFtzuEPqSJHB6QxM", "question": "What is needed for this activity?", "choices": ["snow", "wind", "waves", "ice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345298, "question_id": "9xiXJLAuyGu7ZuRWwDeTRi", "question": "Which item seen here is something of a real animal?", "choices": ["bowtie", "bear", "shell", "cloth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345298.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401421, "question_id": "A226TJ4vgu2LSiyZVTB4c6", "question": "For whom does the persons wearing yellow here work?", "choices": ["airport", "police", "taxi company", "water company"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431017, "question_id": "A2hQPh4DuQbnx4jYCY6QZH", "question": "Foam and innerspring mattresses are some of the most popular choices for whom?", "choices": ["elders", "children", "babies", "adult"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431017.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387882, "question_id": "A3BgxnLp7nddTUXQWZDvuc", "question": "What other animal eats the yellow stuff in the picture?", "choices": ["dog", "horse", "pig", "elephant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387882.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334257, "question_id": "A3Y5uUy5mxnWfurckWfYDD", "question": "Which board game resembles the pattern on the side of the cake?", "choices": ["sorry", "connect four", "monopoly", "uno"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536062, "question_id": "A4Wmt5zU66U7MHxDJ2uhcU", "question": "What is on the other end of the ropes held by this man?", "choices": ["whale", "sail", "helicopter", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100557, "question_id": "A5EdmGTekTBUYhHmFTFubk", "question": "Which of these animals here are older?", "choices": ["top one", "right most", "left most", "neither"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468266, "question_id": "A5bteZRSCMmGmsgQ65n3v3", "question": "The animal in the foreground is what color?", "choices": ["orange", "gray", "yellow", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468266.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382779, "question_id": "A7XyoZpHAxTmzDXpkEwJFY", "question": "What is this food item usually found in?", "choices": ["blt sandwich", "pizza crust", "coffee", "banana split"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382779.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373885, "question_id": "A83FMYkHbhVdTtzexQBCWt", "question": "What are the birds next to?", "choices": ["snake", "goat", "dog", "cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373885.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216806, "question_id": "A86vXSQGc8vSVhwQj85Dez", "question": "How will this person finish this dish?", "choices": ["throw away", "bake it", "freeze it", "fry it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416762, "question_id": "AAaWbr5HNLDU5FJikDXQ4B", "question": "What is the likely location of these cats?", "choices": ["laundry room", "bedroom", "living room", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000416762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97554, "question_id": "AAwwWPDVURFZhNFZUQ3kU5", "question": "What might alert owners that the black cat is nearing?", "choices": ["bites", "clomping", "bells", "odor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097554.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 489037, "question_id": "AB5nz8zJY4E4gc6Q44TCGZ", "question": "Why is someone lying on the bed?", "choices": ["is dead", "passed out", "is resting", "is hungry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000489037.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276679, "question_id": "AB9GBnfiWXzfdEEr82DnBA", "question": "Where do these animals typically sleep?", "choices": ["stream", "tree", "bed", "cave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276679.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354986, "question_id": "ABxyJvWjDysXAcVUKXBLU7", "question": "What is on the plate?", "choices": ["crumbs", "jello", "brownie", "salad"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354986.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539547, "question_id": "AC9iCbUCEgjyr8kNyupvrT", "question": "What type bird is the blue one seen here?", "choices": ["none", "purple martin", "blue jay", "blue bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539547.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381489, "question_id": "ACRSCvFaa2BfdZsosWnaQo", "question": "What are the decorated round boxes next to the man called?", "choices": ["suitcases", "knitting boxes", "hat boxes", "trunks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381489.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234802, "question_id": "ACd4FEon5qXWGQk9v2nNmH", "question": "What does the large structure in the back resemble?", "choices": ["boat", "mountain", "hammer", "clock tower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8663, "question_id": "ACkwrkWxSTKmzK26cuyrLy", "question": "What is the user of this cell phone currently doing?", "choices": ["texting", "radio listening", "talking", "charging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339760, "question_id": "ACmoFxohDjEeyNUeMsaoKU", "question": "The round items attached to the bottom of the box make it easier to do what?", "choices": ["push", "clean", "park", "lift"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66777, "question_id": "ADCMPte5jPQYQ8ShkdLGrp", "question": "Why are people taking pictures?", "choices": ["accident happening", "ski practice", "ski competition", "ski invention"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000066777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75329, "question_id": "ADhzBkpMM7BzkdEaJ7qVCR", "question": "What's the pointy part on the bird's head called?", "choices": ["crown", "beak", "nape", "tarsus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124066, "question_id": "AEfoeVbUWfHDjFR9oKG58m", "question": "This cake will have which one of these type of flavors?", "choices": ["red velvet", "berry", "citrus", "chocolate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551690, "question_id": "AGNFvjLKtmw6Rp2iS8jbyi", "question": "The laptop is being filmed with which type of camera lens?", "choices": ["telephoto", "fisheye", "macro", "wide angle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551690.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548740, "question_id": "AHEfGUEitChFZnsZxQ78yX", "question": "If the baby crawled out of the suitcase what surface would they be on?", "choices": ["vinyl flooring", "linoleum", "carpet", "tile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548740.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37064, "question_id": "AJ3Z5qgNpEe4fbUsNyjSyN", "question": "What vehicle stops regularly near the sign?", "choices": ["bus", "subway train", "carriage", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151682, "question_id": "AJGHpmaRRuRmGw9Cc9Mx9y", "question": "What is the bird sitting in?", "choices": ["lake", "electrical wire", "cup", "bird bath"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156220, "question_id": "AJojd6K8UHJzy6FnwMVofq", "question": "What movie had a setting similar to this one?", "choices": ["shallows", "star wars", "zombieland", "matrix"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8798, "question_id": "ALdNreFh8P2qgPzHH3ct78", "question": "What is on the floor near the cat?", "choices": ["antler", "troll", "slug", "teacup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214548, "question_id": "ALtKRUHCJVT5UYANHorME9", "question": "What is the most likely reason for construction in this image?", "choices": ["new runway", "stadium", "basketball court", "ice skating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89994, "question_id": "AMCYh9gkek877jNLfwHfCu", "question": "What type of vegetation region is pictured above?", "choices": ["grassland", "tropical", "desert", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089994.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366551, "question_id": "AMLXK5fQquFHLRQnNptjhW", "question": "If the train went off the tracks everyone would be in danger of doing what?", "choices": ["leaving", "paying", "drowning", "talking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1685, "question_id": "AMsz6rs825hFWLTmjWj6EE", "question": "How many directions can someone in law enforcement uniform on an Equine travel on Boronia lane?", "choices": ["two", "none", "three", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000001685.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328871, "question_id": "ANDEZtHEfisHZC64GrTAsD", "question": "Which color handled scissors might create the most discomfort while using?", "choices": ["red", "black", "blue", "chrome"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000328871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553868, "question_id": "ANExXvSjZHFJ8QV3HKVCZp", "question": "What type beverage is being consumed here?", "choices": ["coffee", "espresso", "herb tea", "mint julep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291766, "question_id": "ANZ8XGMtakZqkrPG2pHuxN", "question": "How many people could eat from this one plate?", "choices": ["three", "two", "five", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73301, "question_id": "AQm8PocbXFjfFhNYvvPMnS", "question": "What type of organization owns this?", "choices": ["junior high", "elementary school", "high school", "university"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073301.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191070, "question_id": "ARQ8WivhSdpv7AZJGmXQ7y", "question": "Which word in this picture could be described as redundant?", "choices": ["driving", "stop", "ave", "bellevue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403199, "question_id": "ASCbTxk7Vv6ir7fBcCjBnk", "question": "What type of public entertainment center is advertising on the red double-decker bus?", "choices": ["movie theater", "stadium", "orchestra", "theater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403199.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478130, "question_id": "ASDMsM6zA2g2ptGBb6bSPb", "question": "How did this person get to this position?", "choices": ["sat down", "flew", "pushed", "fell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478130.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500934, "question_id": "ASKXdAuzBrMZ99tGusaeVP", "question": "What is touching the surfboard?", "choices": ["hands", "cat", "bare feet", "cane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500934.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494685, "question_id": "ATdmrvBkky4xdg6d23GoE8", "question": "What is the woman holding?", "choices": ["basket", "umbrella", "weights", "baseball bat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494685.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456812, "question_id": "ATzn3R5jmg7eB6xKArxwoZ", "question": "What ball is being retrieved here?", "choices": ["globe", "soccer", "baseball", "tennis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456812.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160015, "question_id": "AUHmfLhs2jbXkMyVHxShJw", "question": "What is the typical exit point of this vehicle?", "choices": ["side door", "roof", "emergency door", "window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160015.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464407, "question_id": "AUxUYoLNqkY6GkEsuR8ZwS", "question": "What will be done to the prices of the items in the store to the right with the red sign?", "choices": ["no charge", "raised", "stay same", "lowered"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464407.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457853, "question_id": "AVPGEUBcNkWjB49yne7asy", "question": "What helps to prevent crime in this area?", "choices": ["police station", "car alarms", "street cam", "neighborhood watch"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000457853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51460, "question_id": "AVmFyBf2bsfHfNSFTJ5sWb", "question": "What is this person doing with the horse?", "choices": ["attacking", "riding", "racing", "performing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285724, "question_id": "AWLWsxXUnXTJ3W9myH8ZxG", "question": "What will that device need to play music?", "choices": ["antenna", "cd", "cassette", "record"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000285724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178736, "question_id": "AXDiUdgfu88AF4kbkQkVTD", "question": "What is the air temperature in the area surrounding the green bench that the woman is standing on?", "choices": ["cool", "warm", "chilly", "freezing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178736.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61283, "question_id": "AXL7mavFKv3gex95rD42rB", "question": "What is usually found in this room?", "choices": ["refrigerator", "toilet", "dining table", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473398, "question_id": "AY8qToQjkMgVsYpojpWDAA", "question": "How many years has this company been providing buses?", "choices": ["98", "97", "50", "125"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170625, "question_id": "AYXRnrs6WDYgN2HtUgKcHA", "question": "What type of transportation is shown?", "choices": ["rail", "water", "road", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218871, "question_id": "AZMC6egBjQZaiCTNMLzuTZ", "question": "What is the man riding in?", "choices": ["jet ski", "raft", "yacht", "canoe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520575, "question_id": "AZjvuzDf6zEHQZCWnCuqom", "question": "In which country does this bus run?", "choices": ["belize", "mexico", "england", "usa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362510, "question_id": "AZxqxisoLZGJWo3KpXUGYw", "question": "What time of the year is it likely to be?", "choices": ["summer-fall", "spring-summer", "winter-spring", "fall-winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362510.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63438, "question_id": "AaJZksFjzFUzyfL4BjXKZ5", "question": "What kind of feet does the animal have?", "choices": ["webbed", "five-toed", "horseshoe", "fins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063438.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38349, "question_id": "Ab9Nd7RajDvShfCgMm59GE", "question": "What material is the brown item under the bird made of?", "choices": ["wood", "ceramic", "cement", "tiles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521522, "question_id": "Ac56mjvi4cz9Tretx5TRUd", "question": "What might be housed in upper chambers here?", "choices": ["files", "food", "bell", "closet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521522.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156220, "question_id": "Ac7RW2j5vZgsfZfjsFgfPS", "question": "Why is he wearing a suit?", "choices": ["dress code", "costume", "warmth", "uniform"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522753, "question_id": "AcFvAkefusgFnvDia7VT3M", "question": "What did the namesake of this street die of?", "choices": ["diabetes", "car accident", "cancer", "heart disease"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10279, "question_id": "AdEJHN5xUdpyJDK4Zi47hM", "question": "What is the first name this famous teddy bear is also known as?", "choices": ["eeyore", "tigger", "winnie", "mickey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487206, "question_id": "Ae5JEM2ttNTzCSsEYe557E", "question": "What is the brown stuff sticking out of the bun?", "choices": ["ham", "bacon", "onions", "hamburger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487206.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67285, "question_id": "AerAEgYJDMWaHPyUrhyMBc", "question": "What European Capital is this astronomical clock located in?", "choices": ["berlin", "london", "belfast", "prague"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000067285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180904, "question_id": "AfnEsCV3mEGAExmMtSJSM7", "question": "Where were the orange scissors made?", "choices": ["usa", "england", "beijing", "hong kong"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168939, "question_id": "AgFbbrmyjVimYSyfdbn2AH", "question": "What does the lower sign mean?", "choices": ["turn left", "turn right", "go", "don't turn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581918, "question_id": "Aiv7YhKhuowRmZeygwxUDg", "question": "What kind of phone is being used?", "choices": ["cellular", "landline", "pay", "rotary"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581918.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330949, "question_id": "AixBRkRCfWqFUMWsMfmFFc", "question": "A bicycle rider is called?", "choices": ["racist", "cyclist", "driver", "motorist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330949.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447660, "question_id": "Aj8DwP2CJntx2SrwhTzfSo", "question": "What time is it?", "choices": ["nine fifteen", "six thirty", "six twenty-six", "five thirty-eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433768, "question_id": "AjiUmCZty9PY3rvdgfDYGL", "question": "Which animas seen here are carnivores?", "choices": ["giraffes", "none", "zebras", "gnu"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57711, "question_id": "AkDThAmWDgC8qvMTWoAvc9", "question": "Which item could be used to repair a eyeglasses?", "choices": ["all", "left", "middle", "right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169472, "question_id": "AkRmNuWr2gCGyW9tKP6Rim", "question": "Why are the bears so small?", "choices": ["children", "deformed", "new species", "malnourished"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000169472.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277863, "question_id": "Am4Hcw2gdSohE9naC9A56f", "question": "How many Cygnus are shown in the image?", "choices": ["four", "six", "seven", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339622, "question_id": "AmmENrD8NmcJBUHbob44yi", "question": "What is most likely in the blue jug?", "choices": ["jellyfish", "salad dressing", "fuel", "methane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339622.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386849, "question_id": "AnBPARwHt365dSPXEMCn3h", "question": "Why is the wooden bench reflected on the pavement in front of it?", "choices": ["fog", "recently cleaned", "sleet", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386849.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391832, "question_id": "AnVgcn4gAFEcRPLbnz3P9t", "question": "Which way is the person shown here likely to fall?", "choices": ["left", "none", "forward", "rightward"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391832.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298476, "question_id": "AngGgKSigJF23wM7Ldt9cQ", "question": "What are the men riding on?", "choices": ["surfboard", "scooter", "skateboard", "bike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574435, "question_id": "Ant7e39fpae3Vbb4W58C5b", "question": "What does these animals provide that required people to use shears for?", "choices": ["fragrance", "horns", "oil", "wool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000574435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 307047, "question_id": "AoCij62HZWq3McwKNSiDoW", "question": "The dog's nose appears to kiss the foot belonging to whom?", "choices": ["intruder", "passer by", "no one", "photographer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000307047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547998, "question_id": "AoGwp4VzfxvbptUzJVTToD", "question": "What type of display technology does the television underneath the cat use?", "choices": ["plasma", "crt", "led", "lcd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371020, "question_id": "AohKKDCJxdKY98pKsB2fwu", "question": "What does this animal like to feast on?", "choices": ["lizards", "hay", "tigers", "doves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371020.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104161, "question_id": "ApshQ5fc2AXCeRXzEhqJwg", "question": "Why are the red pillows there?", "choices": ["decoration", "comfort", "keep dry", "cleanliness"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000104161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29503, "question_id": "ArAi7W8ugpim5i8KBouJkN", "question": "What is the occupation of the person on the horse?", "choices": ["police officer", "fireman", "jockey", "cowboy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029503.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187794, "question_id": "ArqUgv7Pd9WeWjrX2s7Zfx", "question": "What is needed for the surf water to look that color to human eyes?", "choices": ["electricity", "light", "food coloring", "gravity"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000187794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319963, "question_id": "Arw4uoAMnuMc3y5k5dUKJs", "question": "What is the boy holding?", "choices": ["longboard", "surfboard", "shortboard", "bodyboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556174, "question_id": "AtaqEAstRckZu72tbtZx6Z", "question": "What is usually made in containers with this shape?", "choices": ["juice", "soup", "tea", "rice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556174.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20789, "question_id": "AtzhKkeyRvaMAaMB5yK6GZ", "question": "Why is there snow on the bench?", "choices": ["snow machine", "fell there", "placed there", "cold weather"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211187, "question_id": "Au9STqfwvoJkafi6LM8KZM", "question": "What type of hat is the person wearing?", "choices": ["baseball", "fedora", "top hat", "beanie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296339, "question_id": "AvGukDWH6dJjUkiAyfB4Vv", "question": "Who is in more danger?", "choices": ["seashells", "fish", "child", "man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158069, "question_id": "AvXpA5uDBqmzfUXNSUxDH4", "question": "What is the two story vehicle commonly called?", "choices": ["stage coach", "double play", "double dutch", "double decker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77666, "question_id": "AvsKd4gprpHFzw3kCSMQtp", "question": "Which one of these might create the pattern that is on the item below the pizza?", "choices": ["sculptor", "glass smith", "illustrator", "weaver"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000077666.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122297, "question_id": "AwQfzpGTsfDaPyjAUhbTyh", "question": "What can be said about the meteorological conditions here?", "choices": ["overcast", "partly cloudy", "mostly cloudy", "sunny"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000122297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578777, "question_id": "Awtov3sGKtPATh9Q2jdhJ6", "question": "Where is this bear located?", "choices": ["field", "desert", "circus", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490208, "question_id": "AxCih3omnt3gE65rPeinYQ", "question": "What is the train engine design based on?", "choices": ["meme", "comic strip", "cartoon", "child's book"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93270, "question_id": "AxUn5P5r6fFUgfgx6WqSpj", "question": "What item does this person have that could protect them from the rain?", "choices": ["parasol", "umbrella", "banana", "hood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093270.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136598, "question_id": "AznJJYHmkKvNrSeXbBAwDQ", "question": "What button would one press if one wanted to select the letter M?", "choices": ["six", "two", "five", "nine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136598.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362417, "question_id": "B23DxCyHPXUEMp4MCwBknZ", "question": "What is the air temperature in the area surrounding the fire hydrant?", "choices": ["cool", "mild", "chilly", "hot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362417.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85178, "question_id": "B2CfQbFzLBgto564cq6zB6", "question": "What silverware is missing to eat the salad?", "choices": ["spatula", "fork", "spoon", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085178.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8441, "question_id": "B2MKELyMsrjPDZigP3V8L6", "question": "What would be the most dangerous part of this animal to a child human?", "choices": ["horns", "tail", "mouth", "udder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008441.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37930, "question_id": "B2RRPaZXUQNXLvtLAK2otj", "question": "Why would someone sit at this table?", "choices": ["to eat", "to paint", "to work", "to speak"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037930.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468718, "question_id": "B2jQWurLoxoEpg9akiBfEu", "question": "What item is most likely to be sold in the store?", "choices": ["ammunition", "fruit", "wood", "staple guns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468718.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98254, "question_id": "B3qxcRxPsitFSGFPSQqJAN", "question": "What part of his body is the silver object protecting?", "choices": ["elbow", "ankle", "shoulder", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098254.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40387, "question_id": "B5fR5EvVEpAuxJsxhmwSmN", "question": "This size photo is perfect to display in what?", "choices": ["wall", "wallet", "billboard", "album"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105130, "question_id": "B5s8xZPjM76ad8dzBWuins", "question": "Its parent company merged with what other airline?", "choices": ["british", "delta", "american", "virgin"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000105130.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543029, "question_id": "B6QKeddbbmYX5vNvRiGZ2n", "question": "What type of dog is swimming in the water?", "choices": ["rottweiler", "golden retriever", "corgi", "siberian husky"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543029.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171760, "question_id": "B6z5UwF5TUqXj3HnSREVPv", "question": "What does the person all the way to the left have?", "choices": ["skis", "swords", "cows", "bells"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439318, "question_id": "B875ehCjRExMysjz83PKF8", "question": "Why does he have his left hand in that position?", "choices": ["arm hurts", "block sun", "waving", "refusing picture"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000439318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457051, "question_id": "B8SNRbQc5qM4DyJpoUiNcu", "question": "What hits the ball over the net in this game?", "choices": ["paddle", "racket", "paper", "hand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566322, "question_id": "BA7833CDhpbZKkfWMgNQR9", "question": "What would be located at the base of this clock tower?", "choices": ["train station", "airport", "grocery store", "shopping center"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566322.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84543, "question_id": "BAFbr3pNzbFgA2SAznXMfi", "question": "What part of the dog is resting on the jeans?", "choices": ["ear", "eye", "tail", "paw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084543.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177615, "question_id": "BALkf5Ng4LcLSWr6NVmxEN", "question": "Which country is known for having these types of busses?", "choices": ["netherlands", "germany", "united kingdom", "japan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177615.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434649, "question_id": "BAXYU92UuPXEjJvtMiHhhB", "question": "A message can be sent using how many of the items on the table?", "choices": ["zero", "nine", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52301, "question_id": "BAbViu68JBAfj5j9fbH8aX", "question": "What is an alternative name for this type of registration?", "choices": ["tail number", "flight number", "call number", "license number"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000052301.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472805, "question_id": "BAo8eqwNpDAKrm6Jo9FXmQ", "question": "What kind of vehicle would connect to this object?", "choices": ["rv", "bus", "police cruiser", "firetruck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472805.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556427, "question_id": "BAvN7vbbj9f3M5MCcBsjCA", "question": "What type of transportation is shown?", "choices": ["water", "land", "rail", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366228, "question_id": "BBJUdJN39jUcCHWgWCej5g", "question": "What does this ride on?", "choices": ["roads", "air currents", "rails", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301964, "question_id": "BBiz2XftuKmQiGGtfCyJeX", "question": "Standing in this manner maximizes what in this pair of giraffes?", "choices": ["visibility", "sleep", "dance moves", "balance"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000301964.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178101, "question_id": "BBomHmYnmZU9pKcGfnMeQi", "question": "Where did the elephant get what is in its mouth?", "choices": ["watering hole", "tree", "bush", "ground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406003, "question_id": "BCAMScAguEF5KNcCwXoLFX", "question": "Sporting item that is generally made of injection-molded plastic and roughly 8 to 10 inches is?", "choices": ["frisbee", "bat", "shuttle", "ball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000406003.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92655, "question_id": "BCXy3y8uCjd5euZwWYYkW8", "question": "The red and white structure is used to warn who?", "choices": ["sailors", "riders", "trespassers", "children"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092655.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276, "question_id": "BDDZvYAHXFMnDuD9xct5UB", "question": "What kind of building is shown?", "choices": ["courthouse", "barn", "church", "fire station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25420, "question_id": "BDVLcsVqNfTdh6CCds8BAb", "question": "What is being limited by this sign?", "choices": ["yielding", "parking", "sitting", "stopping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025420.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65107, "question_id": "BDpMEaiURy9hhUGKc5izUs", "question": "What are the zebras walking on?", "choices": ["grass", "concrete", "wood", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065107.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172605, "question_id": "BE3i78d534EtX5Afm5q5Js", "question": "These boys set up camp where?", "choices": ["indoors", "national park", "back yard", "kodiak island"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172605.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306565, "question_id": "BF2KBtzMkCWHRz2Qnxdknc", "question": "How many hands are raised above heads here?", "choices": ["two", "one", "four", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350456, "question_id": "BFht9hyuzUadyioXcN24Bi", "question": "In flight which part provides fluid motion enables accurate command on differential braking?", "choices": ["wings", "fin", "rudder", "elevator"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350456.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424004, "question_id": "BGK7mEaD3fE4RD25n5rK6o", "question": "What type of shot is the woman about to hit?", "choices": ["backhand", "serve", "slice", "forehand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424004.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326579, "question_id": "BGS7VNYXZevPvLvfzzhZ8y", "question": "In what year were television remotes invented?", "choices": ["1934", "1893", "1955", "1892"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384876, "question_id": "BGuxKoRLDC5CNpefFsAjVQ", "question": "What would be the military time if it is evening?", "choices": ["530", "1825", "1730", "1630"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 166246, "question_id": "BH52KJiPqFUSQWMjXZdJQU", "question": "What is this man performing?", "choices": ["dance move", "skateboarding trick", "gang initiation", "song"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000166246.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417728, "question_id": "BHZ56bNLbgw8UnkdehHEcd", "question": "What purpose does the front plate provide?", "choices": ["identification", "calculator", "car price", "location"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417728.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488091, "question_id": "BJWzuZfuhUc3jiS27Qjgsh", "question": "What does the above room represent?", "choices": ["bedroom", "kitchen", "toilet", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488091.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382685, "question_id": "BJwxWhWmyMa7E6iHUqFR2c", "question": "What can one rent from this bus?", "choices": ["bike", "book", "car", "tuxedo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382685.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212515, "question_id": "BJyekFetLfctJbPN2uayvy", "question": "What type of skateboard trick is the boy performing?", "choices": ["nollie", "grind", "ollie", "kickflip"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000212515.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524185, "question_id": "BKTQnVaEoALDWNSnd6bZkV", "question": "The big hand is closest to what number?", "choices": ["six", "nine", "eight", "seven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524185.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170447, "question_id": "BLsqsDYt2Lut5iGAD4AyJn", "question": "What type of phone is she using?", "choices": ["rotary", "cellular", "landline", "corded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160434, "question_id": "BMLuxg345bgQJDFiy4UZc6", "question": "What is causing the different color combinations in the photo?", "choices": ["waves", "filter", "sun", "lighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160434.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85955, "question_id": "BMZKEtHcRCLbUgZGGF6LGJ", "question": "What is the woman wearing?", "choices": ["winter clothing", "casual outfit", "bathing suit", "business attire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085955.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178428, "question_id": "BMZmVk3s3ZdkmEbh4Yj53p", "question": "What is helping the surfer from losing his board?", "choices": ["dog", "river", "gravity", "ankle leash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178428.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239548, "question_id": "BNePsoCARAtHmBLZ8Xw4EM", "question": "What is Ben Stidham?", "choices": ["artist", "angry", "cop", "father"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410867, "question_id": "BPJLTpKwhispTgjcHXhbTq", "question": "What is the person wearing on their head?", "choices": ["swim cap", "fedora", "bucket hat", "football helmet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410867.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262291, "question_id": "BPXR89tebKxvRjhmcDsL6i", "question": "What is the shape of the red sign the man is holding?", "choices": ["octagon", "pentagon", "circle", "hexagon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262291.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339015, "question_id": "BPzBNaEex7XTuwNKUzpQRT", "question": "What human like quality might this bird posses or be trained to possess?", "choices": ["speech", "avoidance", "flight", "betting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339015.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 72722, "question_id": "BQUi5VcosWkFcZFtDXYpjH", "question": "Which decal seen here was placed by an official government worker to be installed officially here?", "choices": ["none", "earthly soul", "flag decal", "pimp it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000072722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 392647, "question_id": "BQzrMfjfsJiLgQFSGPQ9wF", "question": "What is the kind of sound does this animal produce?", "choices": ["neighs", "purrs", "chatters", "barks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000392647.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349572, "question_id": "BR9a657YPPABW25tr68vPF", "question": "A protective device against intense sunlight is called what?", "choices": ["cap", "umbrella", "hat", "parasol"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349572.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368147, "question_id": "BSGy8JPvDrF4vnc8oD2KTN", "question": "What is this player preparing to do?", "choices": ["bat", "catch", "short stop", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391719, "question_id": "BSPzDGBv7gdnofyGTHcb9n", "question": "What type of events are held in the building behind the stop sign?", "choices": ["hockey games", "football games", "basketball games", "auto racing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000391719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 88268, "question_id": "BSWmGGZaKSinwHDkezBkMq", "question": "This equipment used to play which game?", "choices": ["flying disc", "cricket", "skating", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000088268.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512556, "question_id": "BT5hk2V3WBzsZZi3rJm72K", "question": "What is the red object meant to hold?", "choices": ["tire", "plants", "hose", "tools"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512556.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572642, "question_id": "BTs2dTPnmNYsKpZVx9VDai", "question": "This road is vulnerable to what kind of disaster?", "choices": ["tsunami", "terrorist attack", "blizzard", "forest fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572642.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200323, "question_id": "BTvZTB7odB4EdZREYn2UYU", "question": "What will the bear do with the fish?", "choices": ["sell it", "eat it", "lose it", "hide it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200323.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 502429, "question_id": "BW4YUMMs5rzrJEfT7cJU2o", "question": "What is he doing with the frisbee?", "choices": ["stealing it", "hiding it", "touching it", "tossing it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000502429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400760, "question_id": "BXNe4m4jtuiZD4jKRtHwwZ", "question": "What style crust is used to create this pie?", "choices": ["thin", "whole wheat", "crispy", "pan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50384, "question_id": "BXNo784q4S6D2k3ikFw77n", "question": "What kind of shorts is the boy wearing?", "choices": ["sweat shorts", "basketball shorts", "jean shorts", "board shorts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050384.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559397, "question_id": "BYrPmnUu4x97FYwduDtSeM", "question": "What kind of bottle is on the counter?", "choices": ["tea", "juice", "water", "beer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343522, "question_id": "BZSPQdKGickieSyQdb9e5F", "question": "What type of animal is shown?", "choices": ["reptile", "aquatic", "domestic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343522.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553257, "question_id": "BbJhdyPUz8sun8XrH26ojW", "question": "What would make these more comfortable?", "choices": ["shade", "cushion", "balloon", "stick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48755, "question_id": "BcNjoMbMsu432QBwYsLCrZ", "question": "What is the surface with holes used for?", "choices": ["water jet", "sound speaker", "microphone", "ventilation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048755.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569473, "question_id": "BciAYHbAqWs2kdqgLERKjP", "question": "The children seen here are likely what?", "choices": ["parents", "enemies", "twins", "unrelated"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569473.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 479114, "question_id": "Bdy4EfoLh8Jz7cvsbfDSJs", "question": "What allows these skaters to see?", "choices": ["lights", "moon", "stars", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000479114.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487896, "question_id": "Bg3Fv6V6AYEAtRxiQ25Nnk", "question": "What type of board is this?", "choices": ["cutting", "bread", "surf", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487896.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355187, "question_id": "BgtGf4Qoqvv99Ui2JjxmWG", "question": "What kind of animals are shown?", "choices": ["reptile", "domestic", "aquatic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53667, "question_id": "Bgy8AsNzbgeLniVqf5kujL", "question": "What type of vehicle is being dragged by the truck bed?", "choices": ["car", "airplane", "bus", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053667.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 242930, "question_id": "Bh6Z8897F6bBT4pVwixxn3", "question": "What direction do these open?", "choices": ["up", "down", "right", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000242930.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214602, "question_id": "BhWjcQkD8YuohCHuKW6PKK", "question": "Which transportation mode here is more suitable for crossing the water here?", "choices": ["bike", "car", "boat", "taxi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214602.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187179, "question_id": "BhfJypxtqNA9KUW2EXeA6i", "question": "What part of the bird's outsides takes up a lot of volume but weighs very little?", "choices": ["feathers", "eyes", "beak", "bones"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000187179.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151769, "question_id": "Bigb9PWUUVEHqMnuNjcX2b", "question": "What are the zebras doing?", "choices": ["eating hay", "drinking water", "rolling over", "laying down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357766, "question_id": "BirJdFL3nbjeW2nG3kJQ7R", "question": "What is part of this food?", "choices": ["salmon", "ribeye steak", "roe", "cheese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482289, "question_id": "BitiaSfaFKi7qxCVtQRgvh", "question": "What is the base sauce used?", "choices": ["ragu", "tomato", "pickle", "cheese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92150, "question_id": "BkKnVVNTadQ8iNDUJKM4Jp", "question": "What would keep the zebra secured if it jumped over the first fence?", "choices": ["zoo keeper", "bushes", "second fence", "tree line"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572910, "question_id": "BkPFjSzjZm6YDQnL7QenJX", "question": "Why is the skateboard sitting there?", "choices": ["cameraman placed", "lost", "abandoned", "for sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572910.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286383, "question_id": "BkpVKsot34xzUrmoTqeqiU", "question": "What is the person walking on?", "choices": ["wooden boards", "flames", "snow", "stones"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286383.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 14390, "question_id": "BmPsw5PdTAaVnagvorR22D", "question": "Why is the clothing on the bench?", "choices": ["for sale", "cameraman placed", "abandoned", "stolen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000014390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26145, "question_id": "Bmhyeaxa8Nsy6nXWKjKJVs", "question": "What kind of mammal creature is on display here?", "choices": ["goat", "cow", "donkey", "zebra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026145.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69964, "question_id": "Bn5kpVQfAXfHBKj3znznnC", "question": "How long is the average fishing pole?", "choices": ["5 feet", "6-8 feet", "4 feet", "9 feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000069964.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38600, "question_id": "Bnv73yoEYxUgkzwENSMuua", "question": "The founder of this wetsuit company previously worked with what company?", "choices": ["dolphin wetsuits", "victory wetsuits", "raven wetsuits", "hawaii wetsuits"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82200, "question_id": "BoJcaLVMrWNMawZeAKEg3x", "question": "What is the man on the fence running away from?", "choices": ["water", "anteater", "horse", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33778, "question_id": "BpdrbVqQ7UqxW5pwFAq4MA", "question": "What is on the floor touching the tub?", "choices": ["sink", "pouch", "trash can", "rug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558218, "question_id": "Bpq5sXPLBiMvkamNpVvVci", "question": "The white seating here is useful because of what added feature the bench lacks?", "choices": ["back", "resting spot", "seat", "arms"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517319, "question_id": "Bq4eDxaiMXKXNJNUs7iAMf", "question": "What are these brushes used for?", "choices": ["hair", "toilet", "teeth", "dishes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548414, "question_id": "Br9jNir7aDanCHijQCzbky", "question": "What is the name of a person who specializes in shoeing these animals?", "choices": ["veterinarian", "farrier", "hoof smith", "podiatrist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258406, "question_id": "BsNnjQYdeosAoS97SkdSvZ", "question": "What is on top of the head?", "choices": ["bonnet", "hair", "tag", "hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258406.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319493, "question_id": "BtSMcSy8gj9zkjrdoVaUER", "question": "Which utensil is on the right side?", "choices": ["fork", "spoon", "knife", "chopsticks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465328, "question_id": "BuJnY2ScNSahho27zuMnYs", "question": "What type of vehicle storage facility are these vehicles at?", "choices": ["railyard", "airfield", "dock", "parking lot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465328.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84955, "question_id": "BuUoeXsh8557QHGkzDUc9t", "question": "This pizza was customized for what type of person?", "choices": ["meat eater", "vegetarian", "omnivore", "pescatarian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084955.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119738, "question_id": "BueAcaNFWPB47LY2x7F2Z6", "question": "Why is the stop sign above colored red?", "choices": ["warning", "priority", "mandatory", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119738.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193419, "question_id": "BxHGhSNgdazt7jV7HPk8bi", "question": "What weather was required for this sport to take place?", "choices": ["snow", "hail", "wind", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404052, "question_id": "BxXLDe42y6ZaP3We6SvGdg", "question": "What does the number indicate?", "choices": ["street numbering", "distance", "age", "price"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000404052.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118711, "question_id": "ByLgcNeWk3zGnJ4K5wjbFC", "question": "Which one of the following words refers to a group of these animals?", "choices": ["flock", "murder", "school", "dazzle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403044, "question_id": "ByfSspbAQDydC5bGzXM7VF", "question": "What is another city that is located in the state where this sign is from?", "choices": ["jackson", "helsinki", "reykjavik", "omsk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49049, "question_id": "BypQcpubMLCastqmCcKyNT", "question": "Where should the silver watch be kept?", "choices": ["pocket", "belt", "wrist", "neck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049049.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93761, "question_id": "C2TAPxBDHZLaHHMWeuSAdt", "question": "What topping is on the bread?", "choices": ["onions", "chocolate", "ketchup", "chili"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106024, "question_id": "C39ouqZLJS2D2LMWmDYZAm", "question": "What is causing the polar bear to make a diving stance?", "choices": ["weather", "balance", "fish", "frost bite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286790, "question_id": "C3L6REu8MB9Tu4YWvJHRyE", "question": "What is the snowboard connected to?", "choices": ["back", "feet", "hands", "waist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344448, "question_id": "C3QNFS6KVkj4S8JE9v2uYZ", "question": "What thing would be very hard to fit into the briefcase?", "choices": ["dog", "toy", "paper", "cloth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344448.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187867, "question_id": "C42Jttm3UPDFX9mvXJZK8s", "question": "What is the symbol on the wetsuit representing?", "choices": ["wave", "rain", "sun", "cloud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000187867.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503198, "question_id": "C7qRLGArJPfWXw4tfxbvHq", "question": "Where is the white bird that is in the lead?", "choices": ["race track", "beach", "water", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452840, "question_id": "C8XoxnRwA9ccWepvfrXRq2", "question": "What would happen to the room if you expanded the white object by the window?", "choices": ["get brighter", "get dirty", "get colder", "get darker"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000452840.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292960, "question_id": "CAMv8Fh52BffDsivrE8oZF", "question": "What is the this boat typically used for?", "choices": ["fishing", "living in", "transport", "tours"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534544, "question_id": "CAjHnJCcKjZUui2NDajP4D", "question": "What sort of sheet is needed here first?", "choices": ["fitted", "flat", "electric", "duvet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460449, "question_id": "CBVKRPDKBmhjyfxHtQi7mz", "question": "If this person were to fall what part of their body would hit the water first?", "choices": ["legs", "head", "back", "stomach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460449.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235048, "question_id": "CBVhpwL43jnQ4zcVFNUNBw", "question": "In what unique way can this animal sleep?", "choices": ["standing up", "with lions", "on trees", "underwater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235048.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32725, "question_id": "CDHscXzCUgPvQNmhPs9ebB", "question": "How are the girders of the bridge held together?", "choices": ["welds", "glue", "mortar", "rivets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032725.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365712, "question_id": "CDNFkfDvHwAgsMvXgZ762j", "question": "What is the name of the foot wear being worn by the boogie boarder?", "choices": ["flippers", "water shoes", "flip flops", "crocs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365712.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532328, "question_id": "CDdDTVxreUcLJs2aajXTAg", "question": "What kind of face is on the TV?", "choices": ["happy", "angry", "sad", "confused"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532328.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493324, "question_id": "CDgRZPkxe28R3uCxvpoUA7", "question": "Where is this palm tree located?", "choices": ["northern canada", "hawaii", "hermosa beach", "suffolk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493324.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379008, "question_id": "CGbqqC7p5VEdwvkNstY8wS", "question": "What time is shown on the clock?", "choices": ["noon", "152", "midnight", "200"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319493, "question_id": "CH66S55isSaLyG2tMtTLum", "question": "Each slice is roughly what percent of the pizza?", "choices": ["25", "50", "ten", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437730, "question_id": "CHTdkFcYuE36o4Z7khNdLR", "question": "How many of the giraffes are most likely adults?", "choices": ["three", "zero", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000437730.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238066, "question_id": "CHrZC7rLGEgxVJb5hYsaKf", "question": "Why is the alligator here?", "choices": ["is lost", "cameraman posed", "confused", "hungry"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000238066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540745, "question_id": "CHtzyeo9RqwhXPzb5BYP6Q", "question": "What is he likely looking at?", "choices": ["audience", "boots", "ground", "judges"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41131, "question_id": "CJM9b7hHumgem8h6Q53NKk", "question": "What is he doing?", "choices": ["stealing board", "seeking shelter", "resting", "leaving beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041131.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40626, "question_id": "CJqVNX4DUNZwUJgHjpPvc7", "question": "What type of screen is shown here?", "choices": ["virtual", "art", "touch", "door"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 426211, "question_id": "CKXZHopKMDAMrP4k87kAjD", "question": "Why do people usually hang things in this manner?", "choices": ["for decoration", "keep safe", "to dry", "for storage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000426211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562065, "question_id": "CLRNsy9r7t6jcbfyt27S5X", "question": "Why are the zebras lowering their heads to the water?", "choices": ["to drink", "to snorkel", "to dive", "to submerge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258281, "question_id": "CLtjfNDCLMBiJfJjb3NqSZ", "question": "These animals are most similar to what other animals?", "choices": ["frogs", "horses", "rabbits", "wolves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258281.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408128, "question_id": "CLu5MeU946tEH6gdwXDp3P", "question": "Why is the fire hydrant spewing water?", "choices": ["broken cap", "pressure relief", "vehicular damage", "cap vandalized"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000408128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535683, "question_id": "CNqG5cMsshDPNXsKHrguNA", "question": "What is this dog trying to do?", "choices": ["run", "sleep", "eat", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535683.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116676, "question_id": "CNzFhpaQKvCaTZ4ZCTtusJ", "question": "What is the picture on the wall close to?", "choices": ["cat", "refrigerator", "ankle monitor", "door knob"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116676.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363402, "question_id": "CQTYjG5jX7buuXd5TU39t7", "question": "What would make these items tastier?", "choices": ["soap", "arsenic", "dressing", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363402.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330714, "question_id": "CQrsfyoXpqCJyQsVoxd4gm", "question": "Where is the second entity that is mentioned based?", "choices": ["saudi arabia", "afghanistan", "united states", "iraq"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 361826, "question_id": "CRMDHXWDRubEiCYUXidJRG", "question": "This animal would be classified as what type of eater?", "choices": ["pescatarian", "carnivore", "omnivore", "herbivore"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000361826.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547390, "question_id": "CSu3W6yj5LRdMwdUzJFLJV", "question": "What is the vehicle traveling on?", "choices": ["dirt", "bricks", "tracks", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189175, "question_id": "CTDYQoR4J2iQ9vJZ9En2n4", "question": "What do the animals look like they are about to do?", "choices": ["hop", "kiss", "slither", "fly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470253, "question_id": "CUDSowbLoGLh25dJaX48Vj", "question": "What animal might have been killed to create an aspect of the musical instrument seen here?", "choices": ["trout", "elephant", "dinosaur", "rhino"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470253.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491027, "question_id": "CWXkEC5Mb9FLz4RpAaXwtN", "question": "The woman is holding what item?", "choices": ["drill", "tennis racquet", "basketball", "spray can"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38903, "question_id": "CWpeGZKeALpXQMVCCFt4Aa", "question": "What material is the round table made of?", "choices": ["bamboo", "wood", "metal", "marble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038903.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216452, "question_id": "CXWUnwbKpnVRh2ayXSgpiz", "question": "What would be the best way to classify this type of pizza?", "choices": ["cheese crust", "thin crust", "extra crust", "deep dish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463375, "question_id": "CXgqgVTEJtv4xkeoZkV5sb", "question": "What word best describes this person?", "choices": ["short", "hefty", "old", "gigantic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 361471, "question_id": "CYbNbAiWQg3Q7VABLtpSo8", "question": "What is the potential danger faced by the animals?", "choices": ["volcano eruption", "earthquake", "car accident", "tornado"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000361471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73457, "question_id": "CZCJSmDZf9HPrqkxxFc7Lk", "question": "Where is the person who owns the cat going today?", "choices": ["office", "home", "no where", "trip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87365, "question_id": "CakeA4ePBsA58TGN9MBSpL", "question": "What is this sandwich most likely devoid of?", "choices": ["cheese", "meat", "onions", "roe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087365.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262666, "question_id": "CanpYn2tQWa6i3PpjuiHUP", "question": "What is the cat doing on the chair?", "choices": ["resting", "drinking", "eating", "grooming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262666.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503027, "question_id": "CbaNdmJQk6aNfYCFUUqkcJ", "question": "What do you wait for at the blue sign?", "choices": ["plane", "train", "taxi", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527168, "question_id": "CcYQhriJr7b8tpkHmVc2R7", "question": "Where does the door beneath the monitors lead to?", "choices": ["kitchen", "outside", "basement", "storage closet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527168.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97388, "question_id": "Cdps8MbAwejPCcAqnWzsjy", "question": "In which environment is this bear most comfortable?", "choices": ["equatorial", "frigid", "warm", "tropic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127591, "question_id": "CedD2X7tpo5DCqsgdnET7Q", "question": "In which temperatures would this creature be more comfortable?", "choices": ["boiling hot", "sultry", "heat", "cold"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127591.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399056, "question_id": "CfUqsjeVvL5JLL9v7MD6VS", "question": "What are the boxes on the right made from?", "choices": ["wood", "cardboard", "plastic", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320341, "question_id": "CfapZC3fMHtao9VWshdnSj", "question": "The clouds most likely suggest what weather is about to occur?", "choices": ["wind", "tornado", "sun", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447481, "question_id": "CgQKnYTaJLsyzsRKsu3Qob", "question": "The name of the mouse brand is derived from what languages word for software?", "choices": ["afrikaans", "french", "thai", "tagalog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447481.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175511, "question_id": "CgRWNx3fdeDErXzbRGCLkx", "question": "What is the metal bit on top of the clock tower called?", "choices": ["weathervane", "compass", "lute", "flue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8663, "question_id": "CgspHLuhn2fvpKC54CpRkk", "question": "What is this person listening to on their phone?", "choices": ["movie", "playlist", "podcast", "radio"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154162, "question_id": "Cj9HH5YTfWDUZPses2sm8k", "question": "What type vehicle does the person taking this picture own?", "choices": ["scooter", "bicycle", "truck", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444938, "question_id": "CjyYoAX5mG2HUHu3J8sjdg", "question": "What are the last two numbers on the bottom of the bus?", "choices": ["54", "68", "85", "79"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324257, "question_id": "CjyqcDFoT9JsZgETgy97SF", "question": "What is the largest number that appears at the highest point on the bus?", "choices": ["nine", "five", "six", "seven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000324257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90979, "question_id": "CkipEqWNz6L2VoYsQESZFs", "question": "What is the man doing?", "choices": ["eating", "birdwatching", "stealing", "swimming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090979.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23324, "question_id": "CmCiEbtyUCciX9RjeLjvo5", "question": "What purpose does the weathervane on the building provide?", "choices": ["coordinates", "wind direction", "bird repellent", "bug repellent"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023324.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2311, "question_id": "CmnrB9HtXA3SwMdZ9tmEqr", "question": "Which side is the cat above staring to?", "choices": ["up", "right", "down", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002311.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114065, "question_id": "Cmo8idaPDJtvosf3EYZtpA", "question": "What type of telephone is being used?", "choices": ["rotary", "landline", "pay", "cellular"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79336, "question_id": "CmpsX4932S2gzR5SGWUDs9", "question": "What might cause more fog in this airport than in many other ones?", "choices": ["fog horns", "smog", "forest fire", "ocean nearby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 240724, "question_id": "CmvUGhMUNrjSdUhH9j9VMJ", "question": "What can the item in the dish be used for?", "choices": ["wiping", "cleansing", "eating", "drying"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000240724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410087, "question_id": "CmwAkL335gffKQ8WRssD8j", "question": "What are these people going to do?", "choices": ["donating stuff", "lending money", "borrowing money", "selling stuff"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40491, "question_id": "Cov3i8G2zGuqE59ngHnrNH", "question": "What is required to maintain this position?", "choices": ["balance", "tidal wave", "quiet", "noise"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040491.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513272, "question_id": "CpVGx8nCcRjXXKkTHjYx69", "question": "The train is powered by what type of fuel?", "choices": ["steam", "diesel", "coal", "electricity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513272.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289542, "question_id": "CqJXGEnFRgiTYYumqpwLQK", "question": "Which direction should the photographer travel to stay on Darkwood?", "choices": ["right", "back", "left", "straight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363984, "question_id": "CqxccFJbdUuWrPgmJfiRMs", "question": "The player kicking the ball here has a goal of keeping it away from a team member with what team colors?", "choices": ["gray", "none", "pink", "dark red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363984.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171938, "question_id": "CrMXZKntkL5a69GFptqsBU", "question": "What type of skiing is this person doing?", "choices": ["speed", "slalom", "downhill", "cross-country"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467371, "question_id": "CrrVMruYCBMogsPxeuk5RZ", "question": "Which branch of the military has offices here?", "choices": ["navy", "marines", "air force", "army"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401763, "question_id": "CruKvaiPhecFv85xGLqij3", "question": "What brand's sticker on the skateboard is closest to the ground?", "choices": ["airwalk", "dc", "vans", "osiris"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401763.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221148, "question_id": "CtA3eFdd3dkm7ZUzU5zZ2a", "question": "What is the most likely continent for this location?", "choices": ["australia", "europe", "asia", "antarctica"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221148.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56757, "question_id": "CvkPUpPeSQWXcV7bGx9Ejt", "question": "What breed of dog is this?", "choices": ["pitt bull", "chihuahua", "boxer", "poodle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458806, "question_id": "CxBQ7Vbnp7bjA9VV7vDTZw", "question": "What is in the tub that cats are normally afraid of?", "choices": ["shampoo", "water", "ducks", "shower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 568124, "question_id": "CxCNfokn3CN4BsR5J27p8z", "question": "What part of the board loosely matches the color of the ocean?", "choices": ["text", "edges", "fin", "background"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000568124.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321067, "question_id": "CxLL2ea3kok87ntfPYqkMj", "question": "Which country is this Airline based in?", "choices": ["germany", "bulgaria", "usa", "netherlands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 217101, "question_id": "CzMLy4aZZeEWcdMEi7iFHQ", "question": "What are the top buds of the flower just beginning to do?", "choices": ["expand", "sprout", "grow", "bloom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000217101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404498, "question_id": "CzY9ezk3pxuPVid9UpgBFa", "question": "What is the child doing with the object in her hands?", "choices": ["brushing teeth", "eating it", "washing hands", "painting it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355390, "question_id": "CzsvP7KAcGJ4XD4HtThpDu", "question": "What is guarding the clock tower?", "choices": ["gate", "helicopters", "dogs", "guards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189201, "question_id": "D23oMgJrCFFoySB9WtUKXe", "question": "What are the people playing with?", "choices": ["basketball", "hockey stick", "frisbee", "kite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189201.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139287, "question_id": "D3dP8hU22TUuiVt8Frghxq", "question": "His skirt is meant to resemble one made from what material?", "choices": ["flowers", "grass", "pineapple leaves", "papyrus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139287.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36076, "question_id": "D4Es4R4S6RCL2GfzsrgFPv", "question": "What kind of material is on the back of the skulls to allow it to stick to the wall?", "choices": ["adhesive", "glue", "chocolate", "gravy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129732, "question_id": "D4n4dGytbxDE2qUWNvcqz3", "question": "Which handedness does this player have?", "choices": ["none", "right", "left", "ambidextrous"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129732.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488010, "question_id": "D4vhDSy9GfxLwRPkbbLsav", "question": "What is this person about to do?", "choices": ["watching video", "selfie", "texting", "replying email"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488010.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311833, "question_id": "D6PwoJkY6tLLQhvJJAkQGk", "question": "What color is the large item?", "choices": ["gold", "green", "silver", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50154, "question_id": "D7MvwuYgNPGcMeAX7qpWC4", "question": "What level of tennis does the player probably play in?", "choices": ["college", "pro", "high school", "amateur"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440276, "question_id": "D7TmvD4QvGTkYX8anahGyu", "question": "What is the boat in the upper right called?", "choices": ["yacht", "kayak", "speed boat", "sail boat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193354, "question_id": "D8GBQTK6XoTqKHQJFmDTEh", "question": "Why is he shoveling snow?", "choices": ["his job", "exercise", "uncover snow", "cleanup property"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118993, "question_id": "D8bSFgLkABh4wD7gQXqaXZ", "question": "What word can be spelled from using three or four of the letters on this specific red and white sign?", "choices": ["haw", "slam", "pots", "work"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420896, "question_id": "D8r3muBa3E9vn6GfzJMQrf", "question": "What digits are missing from the phone number?", "choices": ["21", "61", "53", "25"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420896.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337493, "question_id": "D9sreUYkkahEgQEkgGdPPe", "question": "What type of dog breed is this dog?", "choices": ["siberian husky", "rottweiler", "bulldog", "poodle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264622, "question_id": "DA7fjLUDUtXhFq2QF8Xkme", "question": "How deep is the water under this boat right at this time and tide?", "choices": ["1 inch", "3 feet", "20 feet", "4 fathoms"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264622.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549812, "question_id": "DARCMqb27BjvZ8jhfwRTSJ", "question": "What is the move the skier's is making?", "choices": ["pike", "snowplow", "splits", "flip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549812.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54035, "question_id": "DARvUfLn4wmFoa3XtKGvJn", "question": "What is the name of the activity the boys are doing?", "choices": ["wake boarding", "boogie boarding", "surfing", "skim boarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054035.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223190, "question_id": "DAmaRwqaVPeg9QSgsUL8TE", "question": "What is the boy swinging?", "choices": ["stuffed animal", "shirt", "pizza", "baseball bat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223190.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396503, "question_id": "DBNEWjFHZUznqqbktS7mZF", "question": "What is near the window?", "choices": ["picture", "book", "donkey", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396503.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198999, "question_id": "DByV49vCqcu4CFH3e6gZNM", "question": "What type of region do the giraffe live in?", "choices": ["forrest", "tundra", "desert", "mediterranean"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000198999.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267922, "question_id": "DDAZfpTdY3MoDBivtVTfUb", "question": "What is the ad hoping to prevent?", "choices": ["smoking", "aging", "stealing", "pregnancies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190001, "question_id": "DDkyBXsT7QiTAFBGDqwg7z", "question": "What matches the color of the bear?", "choices": ["bark", "lemon", "strawberry", "lime"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190001.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 94309, "question_id": "DE8Y4foZVtZZeEYEvNNTkE", "question": "What is an occasion you might wear these shoes at?", "choices": ["job interview", "sports game", "wedding", "funeral"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000094309.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493348, "question_id": "DEFmtHfV2yPuN6VvWFWf82", "question": "Why is the board vertical?", "choices": ["falling", "sliding", "showing off", "no control"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572394, "question_id": "DEZ8MUjc9f8pmdRnDAdmUC", "question": "If someone were thirsty here which door would they open to fill their glass with water?", "choices": ["upper", "lower", "right", "none"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000572394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566182, "question_id": "DFuzwkrNdZ2vGWoDWmmTYG", "question": "The appliance in the corner is for making what drink?", "choices": ["coffee", "beer", "juice", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549321, "question_id": "DHTcyWnCbQzhH7SvaewTyp", "question": "How many people are waiting for a turn?", "choices": ["seven", "eight", "six", "nine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549321.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537471, "question_id": "DJ3JotvqKuVzA8BXkAHwgT", "question": "What object kills the most people in this image?", "choices": ["hat", "cigarette", "bookbag", "wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5242, "question_id": "DJmrjVShjB94vo555YhuzZ", "question": "Behind the photographer lies what?", "choices": ["ocean", "mountain", "nothing", "rock face"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 580948, "question_id": "DKu9PCv8jEABXTFJAPRT8M", "question": "What word best describes the two surfers and their activity?", "choices": ["teacher-student", "exclusive", "adversarial", "mutual"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000580948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477554, "question_id": "DKw9nzPWbCEVZF64PQeo6B", "question": "What hour is the clock saying?", "choices": ["four", "six", "five", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477554.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386839, "question_id": "DLKVNHRmksRWBLFC7Wu9Dq", "question": "What was this bench likely treated with?", "choices": ["oil", "stain", "paint", "varnish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386839.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416507, "question_id": "DLcGgpzVMrRKas3YQyYLqX", "question": "What are the stones spaced out in the grass supposed to be?", "choices": ["sculptures", "modern art", "parking spots", "headstones"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000416507.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448727, "question_id": "DNHwG6QG24miPGVu9SUSBz", "question": "What is under the umbrella?", "choices": ["bench", "human", "cat", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393307, "question_id": "DPDk4P2PNdpqB6eDhzvWbB", "question": "What is the person seen standing in the middle of?", "choices": ["grass", "waves", "sand", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393307.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342443, "question_id": "DPyAdkxWuwrjhMu8BQsjy5", "question": "At least how many species are seen here?", "choices": ["ten", "three", "eight", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29638, "question_id": "DQFZHTA3e4cgEgMb6mh72e", "question": "What activity is the cat performing?", "choices": ["hiding", "drinking", "running", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153801, "question_id": "DQg6y6aJzwA7SeXUkjzmDd", "question": "What brand is the bus?", "choices": ["mta", "luthfansa", "alitalia", "interbus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153801.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117575, "question_id": "DQgc5iaRT2u5NXDdcZGPoc", "question": "What is the last name of the current president of this company?", "choices": ["popov", "federov", "semenov", "petrov"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218408, "question_id": "DRKMxqke5h5YteXR7KKgxs", "question": "What might this animal do from this rock?", "choices": ["swim", "fish", "sink", "sunbathe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507639, "question_id": "DRLatoxxhuWcQcEMMvCkDa", "question": "What kind of transport is displayed in the picture above?", "choices": ["road", "water", "railway", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507639.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385278, "question_id": "DSF9fqt9GJhssNcU5PW9e8", "question": "The design on the white sign resembles what?", "choices": ["bacon", "heart", "egg", "walrus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385278.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87469, "question_id": "DSqkpcrxEj2nkcGPDqifFL", "question": "The pallet table here is likely on which type property?", "choices": ["mall", "private", "city square", "police"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486881, "question_id": "DTc7jb88o4MWkLtcXmgP4e", "question": "Which grand slam event is played on this surface?", "choices": ["french open", "wimbledon", "us open", "australian open"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175475, "question_id": "DUfKQi5VEgWxWb8NiYL5RA", "question": "Where is this donut being eaten?", "choices": ["car", "bike", "bus", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222937, "question_id": "DV7SXuDE9uALx6TxvmDrM8", "question": "Why is there a fence next to the giraffe?", "choices": ["keep in", "block traffic", "for children", "deter people"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76559, "question_id": "DVHt8EJXvi8tBP7QNrN5yy", "question": "What are most of the birds shown here presently engaged in?", "choices": ["resting", "breeding", "eating", "laying eggs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076559.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191455, "question_id": "DVe394UgYB6ZJdA58uF2GL", "question": "The logos on the white label refer to what type of companies?", "choices": ["private investigation", "taxi", "credit card", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191455.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31492, "question_id": "DWv22PPD7Qnm8hX8P25n4X", "question": "What is the girl using to get around the yard?", "choices": ["bicycle", "scooter", "kite", "skateboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4726, "question_id": "DXhLQ2PHb768wSZMjRQzoa", "question": "These flowers rest in what element?", "choices": ["water", "silica", "oil", "lead"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420415, "question_id": "DY8dHpaes3WWdWVCTqueKm", "question": "What is the purpose of the cat?", "choices": ["confuse children", "frighten others", "logo", "attract cats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107398, "question_id": "DZcrvefK6ee5WmZR3gTBk8", "question": "What is the man pretending to be?", "choices": ["dancer", "magician", "surfer", "robber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299056, "question_id": "DbS7vKina3PKbjrYDF5xkh", "question": "What is she doing while she holds the phone?", "choices": ["eating", "sleeping", "smiling", "dancing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4868, "question_id": "DbmLrsTGUNDCu6HiC8B4Yw", "question": "What kind of tennis game is being played?", "choices": ["halfways", "doubles", "singles", "whole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411394, "question_id": "Dc88txtLLP64KWsxPY6Hv5", "question": "After picking up groceries where would someone riding the blue bike here put them?", "choices": ["uber", "basket", "ups", "red bike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570370, "question_id": "Dd6zxw2E6TxoR5hu9QqLbf", "question": "What is likely to be found here?", "choices": ["squid", "diamonds", "garbage", "treasure"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570370.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262520, "question_id": "Df6b9iGpcuieNEeQaUX3wv", "question": "The man here was likely working with what farm product?", "choices": ["hay", "coconuts", "figs", "cacti"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346400, "question_id": "Dh7vGnVx7WBazYTPi7aPRr", "question": "Which of these objects is closest to the photographer?", "choices": ["tree", "wall", "sky", "bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346400.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51451, "question_id": "DhCuVdkTffKzPbTJLzVSWT", "question": "What is contained in the gray colored band shown here?", "choices": ["snow", "mist", "marijuana vapor", "smoke"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051451.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37448, "question_id": "DhGwmhGoDgGtztTzUv6wHa", "question": "Where is the sun relative to the direction the camera is facing?", "choices": ["left", "in front", "behind", "below"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037448.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467072, "question_id": "DhV93dP8D4Mdd2wrjmtoCX", "question": "What expression is this person showing?", "choices": ["happiness", "anger", "pain", "sadness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576762, "question_id": "Di2mxW6MCVCQQpqkmXona5", "question": "This person looks most like what famous person?", "choices": ["warwick davis", "beth ostrosky", "moms mabley", "seth rogen"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000576762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158405, "question_id": "Dkmdm7DRPvi6g4zdrsyStM", "question": "Why is the woman wearing boots?", "choices": ["acid", "water protection", "heat", "fashion show"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158405.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19648, "question_id": "Dms68DqrAtkPubgRMjYYJf", "question": "Why is the smaller plane away from the gangway?", "choices": ["broken", "special passengers", "too short", "for servicing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277285, "question_id": "Dn4vi5iAJKkKBd3ot4EYNT", "question": "What age group does the surfer appear to be in based on facial features?", "choices": ["young adult", "senior citizen", "middle-aged", "teenager"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184117, "question_id": "DnQHoLnpmB9JCAXXTQJPvp", "question": "What is this zebra trying to do?", "choices": ["rest", "bathe", "attack", "play"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184117.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458815, "question_id": "DnSTFCfnJHLEGgU4rTCvDY", "question": "What does the prankster who altered this sign try to infer the beverage will give you?", "choices": ["can", "wings", "heart attack", "shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458815.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193111, "question_id": "DnTstf7wxAxmCgcnYmh9Kc", "question": "Where are these animals located?", "choices": ["bedroom", "bathroom", "backyard", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342997, "question_id": "DnZqjW7zbW3C45LPdiW4ek", "question": "The train parked in what type of railroad structure?", "choices": ["railway turntable", "train station", "train depot", "train shed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109413, "question_id": "DnreBL3doQYfh6FEcYwvgu", "question": "What kind of person would have no trouble finding a spot to park and dine here?", "choices": ["cyclist", "boater", "car driver", "unicyclist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456764, "question_id": "DntDYVcbxBpYW3BRVeD47R", "question": "What do these animals like to eat?", "choices": ["humans", "turtles", "hay", "eagles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187984, "question_id": "DooVSEUTKABCUczrzZQJ7r", "question": "How many non regulated items have been added to this signage?", "choices": ["one", "over 5", "none", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000187984.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550803, "question_id": "Doyszv8AvCvid8wzakgiLr", "question": "What type work is being performed here?", "choices": ["wood", "crafting", "water craft", "yard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54207, "question_id": "Dp6mJP9DmvyuVzoqWHovrt", "question": "Where is this bear located?", "choices": ["arctic", "desert", "seaside", "tropics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195030, "question_id": "DpzwfQi6YgZW6CeBg5Nm2e", "question": "What animal is usually found in this environment?", "choices": ["pigeon", "polar bear", "seal", "camel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195030.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317285, "question_id": "DqZKRJswnJzmQ6sBWHdw2Y", "question": "Where does the bird live?", "choices": ["outdoor zoo", "local park", "wild", "pet store"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000317285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397316, "question_id": "DrvBdyA2F3RAEAQYGQjH2d", "question": "What number needs to be added to the number at the top front of the bus to get to 100?", "choices": ["ten", "45", "25", "36"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397316.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52144, "question_id": "DtCK4eHAPLwo4B2Po6nGkt", "question": "What is the train driving on?", "choices": ["grass", "dirt", "railroad", "street"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80440, "question_id": "DtFGe8snLHNKATgmVQo4S2", "question": "What appearance do materials here resemble that construct a sort of alcove?", "choices": ["rock", "sand", "grass", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080440.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229496, "question_id": "DtLJ69zq2RZTozHMJ5YeBb", "question": "What mode of transportation has been stitched?", "choices": ["bus", "skateboard", "car", "bicycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229496.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259096, "question_id": "DvaaA3kEAjPYqPVEd22pYP", "question": "If someone were out of breath what would it be easy to do here?", "choices": ["lay down", "spin", "sit", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259096.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545631, "question_id": "Dvw538JNyEGiuizotZTabG", "question": "In what general type of zoning area is this fire hydrant located in the city?", "choices": ["commercial", "residential", "agricultural", "industrial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424415, "question_id": "DwDqYrVf8adUCinDSY8zKw", "question": "This plane belongs to what group?", "choices": ["navy", "ryanair", "delta", "benedictine monks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135581, "question_id": "DwFUQDrYxCd5CwmrrK8754", "question": "Why is a water bottle under the laptop?", "choices": ["heat", "fashion", "power", "energy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575653, "question_id": "DwNjANUVpuam7PFeiGyiYZ", "question": "How many animal specie?", "choices": ["two", "three", "one", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363855, "question_id": "DxVsvLp3Gg64tfVvEYnX59", "question": "What shape is surrounding the words on the side of the building?", "choices": ["triangle", "diamond", "circle", "square"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363855.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563843, "question_id": "DxdTdRMeu6zKKVMayntYP9", "question": "What type of computer is on top of the bed?", "choices": ["tablet pc", "laptop", "tablet", "slate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563843.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308640, "question_id": "DyXpFXPNxjJWR4ktHh6cmD", "question": "What does the symbol on the top left corner of the phone indicate?", "choices": ["phone brand", "bluetooth", "wifi", "cellular network"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000308640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248764, "question_id": "DznbutagymwBr8MPBufGZ3", "question": "Where can he carry the item if he wants to keep his hands free?", "choices": ["around waist", "around neck", "around ankles", "on head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10998, "question_id": "Dzzj6nbvjhpfvDfi7WXwnB", "question": "Why is he standing like that?", "choices": ["maintain balance", "tired", "slipping", "falling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298681, "question_id": "E2EY5HB8BKhU5XQQeAxHNj", "question": "What is this cat trying to do?", "choices": ["eat", "hide", "drink", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422397, "question_id": "E2k44jNerH5oFVPufDAtHK", "question": "Who is famous for doing what this person is doing?", "choices": ["tony atlas", "tony hawk", "toni basil", "tony montana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303454, "question_id": "E4RpZuhcHDb8WgukKnQE7N", "question": "What is a convenience that is offered?", "choices": ["barber", "school", "pool", "toilet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303454.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524566, "question_id": "E4WebrHoxgpHPFtQwWoygK", "question": "The dog here feels what?", "choices": ["cold", "warm", "exactly right", "fear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33606, "question_id": "E5FGvcxR7zbC6apKdb446j", "question": "What does the triangular blue sign in the above mage mean?", "choices": ["give way", "no overtaking", "pedestrian crossing", "stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033606.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472385, "question_id": "E5KL6kbUdwbrNJL38ezTXf", "question": "What is the white item on the left used for?", "choices": ["fresh scent", "light", "cleaning toilet", "decoration"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472385.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76618, "question_id": "E6FzmNKPAPQ6977ABcm4S5", "question": "Which of these is an American version of one of these stores?", "choices": ["subway", "krispy kreme", "mcdonald's", "burger king"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076618.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299706, "question_id": "E7UbQBQria6po4uBJEYTNG", "question": "What kind of message is the person with the thumbs up conveying?", "choices": ["approval", "hatred", "disapproval", "disgust"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299706.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55162, "question_id": "E7pfYuvPRkgLvjdLxAzsuc", "question": "Which body part of this person is most in danger if they fall?", "choices": ["chest", "head", "waist", "knees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486692, "question_id": "E8cvpvrAANY9StwN8rZFeK", "question": "What kind of transportation is this?", "choices": ["plane", "train", "boat", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486692.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434917, "question_id": "E9Kzc2XbAMr9Xeg2X7X7B2", "question": "What brand of laptop is this?", "choices": ["toshiba", "dell", "gateway", "apple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383874, "question_id": "E9fuu8TsvnhGvSJozekhh9", "question": "What is the bird doing?", "choices": ["mating", "feeding", "hiding", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000383874.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 380387, "question_id": "ECNntSPwyDdUTzXDeDHzP7", "question": "The material on this beach is used to make what product?", "choices": ["plastic", "steel", "cloth", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000380387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 241978, "question_id": "ECkcaZbEAxnMGaieFfoGGp", "question": "What is holding up the bear?", "choices": ["iron rod", "gravity", "picket", "zip ties"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000241978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510767, "question_id": "EDdQZz92VWx6oKPWHMT2qk", "question": "What is the dog sitting on?", "choices": ["pillow", "couch", "stool", "floor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 148388, "question_id": "EEy59SjGwpGdCkmfmkoihM", "question": "Why is his whole body covered?", "choices": ["stay awake", "is hiding", "stop wind", "stay warm"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000148388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416310, "question_id": "EF5R5jRnatwe9bP4G67tdi", "question": "What is this object located in front of?", "choices": ["mountain", "ocean", "building", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000416310.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454461, "question_id": "EFUYnJA6vmaSFfbujAS2fQ", "question": "What company is known for making the device in the middle of the room?", "choices": ["nathan's", "nintendo", "mcdonald's", "hp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 62866, "question_id": "EGL7qR2hx3wUtzR2WmMi4y", "question": "What is the woman holding?", "choices": ["fork", "umbrella", "camera", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000062866.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44222, "question_id": "EH4FGvMX3KeggAy4LMbpyA", "question": "What meal could be prepared with an ingredient found here?", "choices": ["cassava", "blackberry pie", "mississippi mud", "fried chicken"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000044222.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 313994, "question_id": "EHLcC22YUKuziJZ5UuHutc", "question": "Eyewear shown here protects from what celestial body?", "choices": ["sun", "venus", "moon", "mars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000313994.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93897, "question_id": "EKfTay2CWZegzBcxmcdSwA", "question": "What is the person with the camera most likely doing in the office?", "choices": ["interrupting", "being welcomed", "being friendly", "being promoted"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093897.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 398987, "question_id": "EKvJkmiHJ9LrbqGBPdBaP3", "question": "Where is the sink that the cat is lying in likely located?", "choices": ["dorm", "bathroom", "kitchen", "laundry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000398987.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501192, "question_id": "ELeCEPzBovKpybGDKvC8w2", "question": "What is at the top of the pole he is holding?", "choices": ["rake", "umbrella", "broom", "shovel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284998, "question_id": "ENoyfic6DzJDxHieJFVYcN", "question": "How many languages are present on this sign?", "choices": ["two", "three", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66392, "question_id": "EQ7f6NVry7QnXd8wXCi2gb", "question": "What animals are known to be attracted to these and go to the bathroom on them most often?", "choices": ["snakes", "dogs", "mules", "donkeys"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411634, "question_id": "EQXeMWa42C6MtgjPJqKeNB", "question": "What type of food is shown on the paper plate?", "choices": ["hamburger", "hot dog", "fish", "chicken cutlet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411634.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384141, "question_id": "EQdoYnJxgKxHD6GhWA9syW", "question": "What is causing the steam effect on the camera lens near the surfer?", "choices": ["snow", "water", "fog", "smoke"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182744, "question_id": "ERFwS67KBKhF2L2hxQNQUG", "question": "The boarded up black panes to the left of the set of doors were once what fixture at this station?", "choices": ["pay phones", "cafeteria", "ticket windows", "store fronts"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000182744.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1848, "question_id": "ERmroqrkGHfTDUDY5dKACp", "question": "The item seen here can be most useful how?", "choices": ["coffee drinking", "cantelope cutting", "omelet preparation", "steak serving"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000001848.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524161, "question_id": "ERsgHHSJe2ik4QC8fFQdGy", "question": "What type of hat is the man wearing?", "choices": ["beanie", "chef hat", "fedora", "baseball hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332698, "question_id": "ETWYPpNAvEpqNkGihEKoQB", "question": "Based on the leaf color how fresh are these orange?", "choices": ["very old", "very", "old", "some what"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332698.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146069, "question_id": "EU8CEsbzDCBi3CpmHEnpsZ", "question": "What activity could be performed on the object the cat is sitting on?", "choices": ["instant messaging", "weight lifting", "massage", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193505, "question_id": "EUrhQ7tMp8WHyefLy9D23J", "question": "What company uses the red word on the bus in their name?", "choices": ["dutch boy", "green giant", "amazon", "american express"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415584, "question_id": "EVYsTfVeMtjQsV7XMX9JFi", "question": "What year was this house most likely built around?", "choices": ["1980", "1950", "1920", "2010"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000415584.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152421, "question_id": "EW3PFERt5wdSFAFJzY73KW", "question": "What is the profession of this woman?", "choices": ["banker", "janitor", "athlete", "cashier"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 88356, "question_id": "EXsFT4FALb2svcHPoEzuWL", "question": "Where is this zebra taking nap?", "choices": ["flowers", "beach", "trash", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000088356.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514963, "question_id": "EYvyo2eD6YChihYVVmqgBZ", "question": "What hobby does the owner of the room probably enjoy?", "choices": ["karate", "rock climbing", "cycling", "skiing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 246904, "question_id": "EZJT3rQKiAQvamNrFK2jGB", "question": "What kind of vegetation are the giraffes among?", "choices": ["crops", "bushes", "flowers", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000246904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273221, "question_id": "EaBuFZZ67qYnG92iB4Y7WD", "question": "What is the athlete ready to do?", "choices": ["dunk", "dribble", "swing", "run"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342282, "question_id": "EaF7367LLHNLqbb2LkZ3km", "question": "To which continent does this plane fly?", "choices": ["antarctica", "australia", "asia", "south america"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357161, "question_id": "EbJM74KaY4v49cZrPL4N23", "question": "Why does the dog have the vest?", "choices": ["to float", "stay warm", "keep there", "fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60318, "question_id": "EbaNwPgGLgc8Au8enHS7DH", "question": "What animals are behind the fencing?", "choices": ["tiger", "lion", "zebra", "panda bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310340, "question_id": "EbybFCepckgp9VVCtFHsTE", "question": "What animal are these toys?", "choices": ["horses", "dogs", "bears", "cats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454815, "question_id": "Ec9HyRS38TEjDpagBfykyC", "question": "What language are the words at the top of the clock in?", "choices": ["greek", "klingon", "french", "spanish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454815.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581585, "question_id": "EcrquK5jCKezTwWSknhZ3h", "question": "What sport are they ready for?", "choices": ["fishing", "surfing", "swimming", "water skiing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63034, "question_id": "Ed8ErUXiKLiXmNMUGJCEiJ", "question": "The item held in this persons left hand with a band goes around what?", "choices": ["neck", "back", "wrist", "ankle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063034.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 208627, "question_id": "EdBuhntyFs2Yy2aVDdFKmJ", "question": "Why does the boy have his arm out?", "choices": ["throw", "balance", "wave", "measure"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000208627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129845, "question_id": "EdX43oJmrJ7ZDGz7VsxZMm", "question": "What feature of this animal is particularly large?", "choices": ["nose", "ears", "wings", "gills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129845.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481559, "question_id": "EfibtvpS6VPGXPrgaKWaQ8", "question": "What is the animal looking for?", "choices": ["fish", "candles", "dogs", "eagles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481559.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394347, "question_id": "Ei3SshgyXtAKKyWUraenCd", "question": "What did the dog leave behind in the sand?", "choices": ["droppings", "prints", "collar", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000394347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450520, "question_id": "EjR6jvKnbpCnpjykjT9c7F", "question": "What is the pattern on the umbrella?", "choices": ["stripes", "blobs", "plaid", "circles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296008, "question_id": "EjsVEtn9rp22KZjQzzBJGt", "question": "What is the elephant likely feeling?", "choices": ["hunger", "fatigue", "curiosity", "thirst"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73504, "question_id": "EkGmXDvQjovU4BKepDdNmf", "question": "What is closest to the window?", "choices": ["sheet", "cat", "radiator", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073504.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247550, "question_id": "EkWJCp5QYLmMTtFKw4j2hn", "question": "What is the main color of the bird in the image?", "choices": ["green", "purple", "yellow", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484613, "question_id": "EkWjNzLvxKGN5SjuBXxk2q", "question": "What is the front zebra doing to the back zebra?", "choices": ["feeding", "biting", "yelling", "kicking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484613.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467503, "question_id": "EmDeVbX3irS2B75UYddswc", "question": "What type of transportation is shown?", "choices": ["air", "water", "road", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467503.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 423573, "question_id": "EnL7v8sWVP6LWEZCDYcbnp", "question": "Where are these animals relaxing?", "choices": ["street", "field", "forest", "taiga"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000423573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501192, "question_id": "EnePRCNYUjUG6YZcxoTwLv", "question": "What improves by wearing those on his face?", "choices": ["intelligence", "wisdom", "vision", "hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77682, "question_id": "Eng6wEHG2jqpQjUWnLLz8Y", "question": "The name of the company at the top of this item ends in what letter?", "choices": ["y", "", "w", "h"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435511, "question_id": "EnzbF4HAQaMb8NZyPihGrU", "question": "This cat is being viewed through what?", "choices": ["porthole", "sink hole", "peephole", "donut hole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424415, "question_id": "EoRvnZktiC7FTUpoPHA6bP", "question": "What military branch is the airplane from?", "choices": ["marines", "navy", "army", "air force"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543370, "question_id": "EoaFQAjkWQrbzwQQ9KL5H5", "question": "What theme is the women trying to portray with the photo?", "choices": ["vintage", "urban", "nature", "cowboy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543370.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76060, "question_id": "EpoLJsu2FiEwsrS67qQkCF", "question": "Which item here has the longer lifespan?", "choices": ["bear", "wendigo", "tree", "human"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076060.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172469, "question_id": "EqQSBeWkqjTpjHHgnAq82d", "question": "What is the likely relationship of the big elephant to the small one?", "choices": ["mate", "mother", "unrelated", "sibling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431783, "question_id": "EruHTJDWzhAGj3MVyidDmy", "question": "What is the official name for this sport?", "choices": ["equestrianism", "gymnastics", "curling", "golf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251594, "question_id": "ErxQW663YXZxMBTTpSpRPu", "question": "What position is this?", "choices": ["sitting", "laying down", "diving", "leaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251594.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106377, "question_id": "EsXv25JAssvMRfcApWsu9Z", "question": "How many kilometers away is the location referred to on the sign?", "choices": ["2.5", "2500", ".25", "250"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177351, "question_id": "Esd9sV63roBqLUT9mAPbKe", "question": "What is someone trying to make using the red bowl?", "choices": ["pizza", "cake", "pasta", "popcorn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177351.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 322982, "question_id": "EsrQP4q6TcahN9WMpoCHYX", "question": "What is required for this activity?", "choices": ["snow", "waves", "wind", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000322982.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226285, "question_id": "Et4DuoAboNChWrQfDCQ5Zk", "question": "The silver structure ahead means what?", "choices": ["danger", "pay toll", "keep out", "all welcome"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000226285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 327414, "question_id": "EtZxsNAmmBspgQKPkm9yXD", "question": "What can be used to describe the kind of person behind the stop sign?", "choices": ["entrepreneur", "ruler", "philosopher", "holy figure"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000327414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404562, "question_id": "Ete7CugsXUJczeE8kAnzDc", "question": "Where can a real life object like the blue one be found?", "choices": ["corporate office", "smoke shop", "graveyard", "train station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404562.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535085, "question_id": "EuHvDwWN5ANcRkMNze8GpY", "question": "What former baseball player has the last name that is on the bear's shirt?", "choices": ["doug sisk", "pete schourek", "don aase", "don baylor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202038, "question_id": "EvTLS2UBf58hvKUBfzN5hQ", "question": "The animal got to this location how?", "choices": ["escaped pen", "uber", "truck", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202038.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306237, "question_id": "EvUFcNa22NnxMA8tQHqHbG", "question": "What skateboard move is being performed?", "choices": ["reverse", "ollie", "grind", "superman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136856, "question_id": "EvqRLyZxzM6ffBBxn6rYqp", "question": "What is the man standing in front of?", "choices": ["river", "lake", "desert", "mountains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136856.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499065, "question_id": "EwJWr8cmcp5a38szGLQjPN", "question": "The man in the dark textured shirt is pretending to drink out of which bathroom fixture?", "choices": ["bidet", "toilet", "shower", "sink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155660, "question_id": "EwYETXdBUxRWKYqR9Lm9Bj", "question": "Who played the character that is mentioned on the sign?", "choices": ["christian bale", "brandon routh", "henry cavill", "klaus kinski"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566073, "question_id": "ExAEzxpYx22RcLzuZn82e4", "question": "What is mostly used to manufacture the vessels above?", "choices": ["gold", "glass", "salt", "clay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566073.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 149522, "question_id": "ExoTS3EFza9Kg33pE3bnhw", "question": "What animals are in the woods?", "choices": ["foxes", "dogs", "cows", "aardvarks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000149522.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 266201, "question_id": "EyMqnjG7Cd2UELpQoYVAKi", "question": "What number is one less than the number on the bike?", "choices": ["37", "19", "16", "30"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000266201.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424317, "question_id": "EyXNWQWk7GPUZzKjQBYz2t", "question": "Who is allowed to sit on benches here?", "choices": ["anyone", "bikers", "church members", "no body"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424317.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346285, "question_id": "EzsYfY2nqc2hbNeWL9yimh", "question": "What does Roja most likely refer to?", "choices": ["wine color", "crayons", "buildings", "country"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501040, "question_id": "F2ZkyHFkvA2PpqvexvKsZi", "question": "What is the biggest danger to these people at the time of this photo?", "choices": ["drug overdose", "cancer", "car accident", "drowning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250508, "question_id": "F2yvrkhLF2yhRV5SX4hKP6", "question": "What are the zebras doing?", "choices": ["mating", "drinking", "foraging", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539382, "question_id": "F3Nqu9MnuQhimnqbhpi58q", "question": "Where is this bear located?", "choices": ["enclosure", "vet", "circus", "fair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539382.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506423, "question_id": "F3b8c9kREd78T65NLRtjM3", "question": "What food can be made from a product that comes from this animal?", "choices": ["silk", "cheese", "almonds", "honey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144928, "question_id": "F3hx4AETzbFvVFzXtos63g", "question": "The white plane is a mini version of a what?", "choices": ["jet", "rocket ship", "seaplane", "stealth bomber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144928.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447963, "question_id": "F4MXzGKQteds4BpF8Fc2nn", "question": "The filling cream is probably what flavor?", "choices": ["blueberry", "strawberry", "cherry", "cabbage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 78724, "question_id": "F4dJJxKVbxLr8zTGV7ZCNe", "question": "The color seen here is cast by what?", "choices": ["moon", "sun", "man", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000078724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204215, "question_id": "F5xvQbALk5sVJFLxNtNw4K", "question": "Who uses this bus?", "choices": ["soldiers", "prisoners", "travelers", "students"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204215.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95917, "question_id": "F6Knvw3RctMTaPNf7RaXoi", "question": "Why are they moving uphill on skis?", "choices": ["challenging", "illusion", "trail there", "lost"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459318, "question_id": "F6pSH6uySUb9hQVmf4v8rT", "question": "What is the name of the transport system in the picture above?", "choices": ["car", "bus", "lorry", "railway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44557, "question_id": "F7fFvDuZJHSgKYfEz6eNQJ", "question": "What wrestling organization had a weekly program that shared the name that appears on the bottom of the board?", "choices": ["wwe", "aew", "roh", "wcw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98978, "question_id": "F8prTEvQGDSiw8eYixc3Hm", "question": "This is located in which city?", "choices": ["albany", "bend", "hillsboro", "portland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200223, "question_id": "F9oxRNrBdVWUNcwDFZGrWK", "question": "What part of the plane could fold up?", "choices": ["tail", "wheels", "nose", "wings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200223.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204016, "question_id": "FB5v49bLBJjS8N847ygtPp", "question": "Who is fluent in the language the sign is in?", "choices": ["mike mizanin", "jessica biel", "hulk hogan", "alexis bledel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204016.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40021, "question_id": "FB7CaDaw88hMrQt4X7rLkq", "question": "Where is this polar bear?", "choices": ["aquarium", "ocean", "backyard", "north pole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040021.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512432, "question_id": "FCpbpFayRB7HCBkygoZLh6", "question": "What does the WY stand for?", "choices": ["wary", "wyoming", "why", "way"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512432.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40546, "question_id": "FD44iBmurC4QeHrAd2CiSr", "question": "Where is this dog located?", "choices": ["yard", "home", "barn", "vet"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000040546.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453952, "question_id": "FDADzqmiXLNxNxExXuMumA", "question": "When was this bench installed here?", "choices": ["today", "1900", "2020", "2012"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512720, "question_id": "FDHgFv3ohmFDkQnLVpUiog", "question": "What is the animal in the foreground showing?", "choices": ["horn", "tooth", "stinger", "tongue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512720.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 426100, "question_id": "FDM7qgSDzaXeWvuD5cnvSj", "question": "What type of stroke is the woman performing in this sport?", "choices": ["backhand", "serve", "forehand", "overhead smash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000426100.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397456, "question_id": "FEtwrkGV9hdskzjzvM7d9z", "question": "Why is he bent over with his arms stretched out?", "choices": ["showing off", "prevent falling", "is falling", "is resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397456.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272990, "question_id": "FFV9wUHUaS6bJAS4G2Bqfg", "question": "Which one of these items does the advertised store sell?", "choices": ["computer", "taco", "saree", "lamp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272990.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204215, "question_id": "FGo5XLyf2JjsuonxeoGPRg", "question": "What kind of bus is this?", "choices": ["sight-seeing", "prison", "city", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204215.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54044, "question_id": "FHz63R5RtuKWEJpzBRQF2V", "question": "In five minutes it will be what time?", "choices": ["11", "one", "12", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487715, "question_id": "FJeGVCQdT9odf49SXXWYmE", "question": "What city is this signpost located in?", "choices": ["london", "moscow", "orlando fl", "hershey pa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487715.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404550, "question_id": "FJgK98gW8diAY4QcZU3QX2", "question": "What kind of design is on the surfboard?", "choices": ["radial", "striped", "zig-zagged", "circular"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446817, "question_id": "FKxUdqfKf8sVoBYQk4VuWw", "question": "What is the giraffe looking at?", "choices": ["tree branch", "boat", "ocean", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358955, "question_id": "FL3UStBZttsYtmE8nU4GUE", "question": "Which toilet is used by girls?", "choices": ["any", "left", "none", "right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358955.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67081, "question_id": "FM9oyWLCSqLgBfz4VT7eVL", "question": "What is the initial for the bride?", "choices": ["j", "", "y", "c"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000067081.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 165371, "question_id": "FMxHL7CTKKg6oiqBkgjwJg", "question": "What is the blue item in the center most likely made of?", "choices": ["mud", "steel", "glass", "brick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000165371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136802, "question_id": "FQ889DCdaAumNPEjGPKnvC", "question": "What is the player ready to do?", "choices": ["dunk", "dribble", "swing", "sprint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558285, "question_id": "FR57DeBaFGycx7grXVmpmD", "question": "What are these animals used for?", "choices": ["wool", "antlers", "horns", "leather"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552477, "question_id": "FRpNzfVrXgwSqNLyVKbYn3", "question": "What is most clearly absent here?", "choices": ["red clothing", "umbrella material", "nothing", "boots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552477.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552718, "question_id": "FShuKR3z6Q4BfQrQLoUtTo", "question": "What is the key process to make wine?", "choices": ["thawing", "brewing", "fermentation", "pasteurization"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552718.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102875, "question_id": "FTCgFaQPZB6ZnTNZwjkhpK", "question": "Why would someone sit at this table?", "choices": ["to speak", "to paint", "to work", "to eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102875.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570300, "question_id": "FTGWBEv5b4mvtQg2CbNc3J", "question": "This flower vase is made up of what?", "choices": ["plastic", "wood", "fiber", "ceramic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527613, "question_id": "FTZ2gbh8pZAwaBQ7YgT9wY", "question": "What is needed for this activity?", "choices": ["sand", "rain", "sun", "waves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527613.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338485, "question_id": "FUJaGgtVVG4aGkJMrTdzih", "question": "How are these objects controlled?", "choices": ["remote", "weather", "battery", "string"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558446, "question_id": "FUSpGe7vT7JxFopuS2MoQU", "question": "If you want to find the address on the side of the box what do you need?", "choices": ["internet", "train", "map", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258087, "question_id": "FUewS5f8z9jUM9j5V8ReJs", "question": "What is the bird doing?", "choices": ["sleeping", "is lost", "resting", "foraging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163885, "question_id": "FVzSJwSbxgcDaJcMKqL5cH", "question": "Which vehicle likely was the one that caused the crashed?", "choices": ["no accident", "suv", "pickup", "third party"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163885.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490357, "question_id": "FWXypUnNCUbchXDTXfR5ku", "question": "What is preventing the bird from reaching the food inside?", "choices": ["plastic container", "its beak", "its wings", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490357.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60318, "question_id": "FWbpunQ6TFLhvD93S3TvfF", "question": "What does this animal have on its feet?", "choices": ["hooves", "shoes", "spikes", "boots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354293, "question_id": "FWx2QyudFUsvuczpbYB7Eg", "question": "What should someone wear when diving this?", "choices": ["helmet", "box", "scarf", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354293.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467828, "question_id": "FXKkUFFHqSzFq7qD5Wc3H4", "question": "What golfer has a last name that can be found on the left sign?", "choices": ["tiger woods", "arnold palmer", "ernie els", "paula creamer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467828.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23856, "question_id": "FXfeMRCUdA5hjzWGBWN5GY", "question": "The owner of this tie is a lover of what art form?", "choices": ["metal welding", "music", "surfing", "painting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023856.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277657, "question_id": "FXxPqSA569x3ujH3kZBtbq", "question": "What genre of music does this person like?", "choices": ["country", "punk", "jazz", "pop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219875, "question_id": "FYaVg9aSDxvqmDMiT7h4Sk", "question": "Why does the snow end so abruptly?", "choices": ["man made", "warm front", "carried there", "cold front"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000219875.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444946, "question_id": "FZHDjCbxyCihJeNJJKgpm5", "question": "Where is the stop sign seen here normally located?", "choices": ["bus tirewell", "bus side", "sign post", "bus hood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176865, "question_id": "Fa5dcG72CDJmGzt2ymt3WL", "question": "What is the air temperature where the man is standing?", "choices": ["chilly", "hot", "mild", "cool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176865.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193354, "question_id": "Fa9jYyJPcB3aR82iqgao3a", "question": "What would make it easier to clear the area?", "choices": ["hose", "broom", "rake", "snow blower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291823, "question_id": "FaJFMn7Qg9UowTnBp76Roe", "question": "In which country is this city bicycle and umbrella located?", "choices": ["vietnam", "japan", "hong kong", "china"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000291823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39354, "question_id": "FagqX8QJWPucxiE4cNy2MS", "question": "What is the beige building behind and to the left of the clock tower?", "choices": ["saint margaret's", "parliament square", "portcullis house", "westminster palace"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000039354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 174569, "question_id": "Fc6Awxx2hfyom29qyzJyV8", "question": "What is a usual topping on this food item?", "choices": ["chocolate syrup", "pepperoni", "relish", "gummy bears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000174569.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290740, "question_id": "FcXcrHyFwCyFEp4xUJrrSA", "question": "What color is in the middle of the main flower?", "choices": ["blue", "orange", "pink", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290740.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448968, "question_id": "FcxDTKSKJ2DWPue2B9LMdQ", "question": "What is the bird in the background incapable of doing unlike most birds?", "choices": ["swim", "move", "fly", "see"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321699, "question_id": "FdbufyHCMXdruNCXg5ybMc", "question": "Why would the planes be crossing each other's paths so closely?", "choices": ["dog fight", "air-traffic control", "air show", "accident"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321699.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111200, "question_id": "FeY6UB7LYmhiJMSqAkuQ8x", "question": "What in this tree do these animals seek?", "choices": ["birds", "insects", "leaves", "eggs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369827, "question_id": "FfJznZYGuvXfSkYh64G3Nj", "question": "What brand is the airplane?", "choices": ["finnair", "alitalia", "spirit", "jet blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369827.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 335627, "question_id": "Ffp4W9sjhfZbZXyP8YLAvB", "question": "What is unique about this animal?", "choices": ["nothing", "hooves", "ears", "long neck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000335627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576545, "question_id": "Fg2hvSMGwo6ciLTmeYoAm4", "question": "What kind of surface is the airplane on?", "choices": ["parking lot", "runway", "ocean", "highway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576545.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303353, "question_id": "Fg2iZ8c2N9eMetVoh7NMJL", "question": "What is inside the character donned in an apron here?", "choices": ["stuffing", "cookies", "guts", "marbles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348442, "question_id": "FgEhjR42JoLeAkL4xkRnkp", "question": "What is the term for this grouping of animals?", "choices": ["storm", "bundle", "herd", "bindle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348442.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 141778, "question_id": "FgeoHsoUT2Z4ByYehhT8S5", "question": "The driver here is about to shoot these animals with what?", "choices": ["water gun", "bow arrow", "camera", "gun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000141778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258422, "question_id": "FgxmZjcrmB7H26qkCjh3o2", "question": "What room might this bear be in?", "choices": ["living room", "laundry room", "bedroom", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262854, "question_id": "FhRKLFn8xdmrr7NFYL6mmM", "question": "The yellow sign indicates children might do what?", "choices": ["get kidnapped", "run across", "fall down", "play pranks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306388, "question_id": "Fi6gEUdDBVqXJM5g3QQDpc", "question": "What makes the wood shiny?", "choices": ["rain", "dirt", "snow", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452946, "question_id": "Fjf3qCdigwGEN5posvWnxE", "question": "What else might you find in the room with the white thing?", "choices": ["bath", "dining table", "microwave", "sofa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162929, "question_id": "FjoJhdiBA3HJAhufy9hv6N", "question": "What is this picture besides a candle is more associated with entertainment?", "choices": ["butcher knife", "beer bottle", "chair", "cds/dvds"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000162929.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111374, "question_id": "FjogmMii2QTZtB7bAm5Zk8", "question": "What kind of items should be kept in the white appliance?", "choices": ["canned", "candy", "cleaning", "perishable"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000111374.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567745, "question_id": "FjrsMY77akF88aT6ajh382", "question": "What was most likely the weapon used in this homicide?", "choices": ["blade", "gun", "fists", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454049, "question_id": "FkmSWVi6D7wseihM6rpmVP", "question": "This downtown area is situated in which country?", "choices": ["canada", "germany", "france", "united kingdom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454049.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 241627, "question_id": "FkoM76AqYsV6jfJbgJ4Uj3", "question": "What has been used to keep people from losing the caps on the hydrant?", "choices": ["ropes", "chords", "chains", "yarn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000241627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301023, "question_id": "FkvtYf7XCwwfxFeyJVdi6u", "question": "What is lacking in the shower?", "choices": ["vents", "showerhead", "towel", "privacy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 568885, "question_id": "Fs4zjMticfkupwK7nD6arR", "question": "Why do the animals have their heads down?", "choices": ["grazing", "hiding", "sad", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000568885.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63957, "question_id": "FtVGsM22mjdXx2CExE6dUa", "question": "In what storage area of the house is the longhair cat present?", "choices": ["kitchen cabinets", "bedroom closet", "garage", "basement"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063957.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555465, "question_id": "FusrHscnt3rfkzaiCKpG5k", "question": "The brochure is for an art fair in what country?", "choices": ["taiwan", "japan", "belgium", "morocco"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555465.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399776, "question_id": "Fuwf5JEzdvLEbBbgLm9zRz", "question": "What two languages are people in this area fluent in?", "choices": ["english portuguese", "english italian", "english french", "spanish english"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399776.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461198, "question_id": "FvDfdQTzaPnjEdhMRurF7H", "question": "What is the name given to this bird?", "choices": ["cock", "duck", "geese", "hen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 523432, "question_id": "FvgHdGuhzLcPuX674GCxV3", "question": "How did this skier reach his altitude?", "choices": ["lift", "skiing", "uber", "taxi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000523432.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484691, "question_id": "FvxCjVnx8cuzRyAdCpZfdk", "question": "Why is he standing on the back of the skateboard?", "choices": ["showing off", "is confused", "is falling", "is tired"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 210390, "question_id": "Fw6YLoZunfjoXype8aH3NC", "question": "How can this phone be activated?", "choices": ["say siri", "flip it", "dial it", "operate pager"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000210390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378144, "question_id": "Fy2jHfr9JEA6uF4iBu9B8o", "question": "What video game character uses this item as a weapon?", "choices": ["milich oppenheimer", "scissorman", "norstein bekkler", "kefka palazzo"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000378144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159920, "question_id": "Fy52T6ofEccGeV6PvMP2Nf", "question": "What kind of expression is the man wearing?", "choices": ["disgusted", "sad", "focused", "angry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 203508, "question_id": "FyKihmDzXbeEjcbMnXaBxi", "question": "The owner of this home probably practices what faith?", "choices": ["buddhism", "christianity", "wicca", "islam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000203508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105965, "question_id": "FyUtTuXxs3ZRSMjF6qjTR5", "question": "People shown bending below the plane are handling what?", "choices": ["grass", "birds", "baggage", "passengers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105965.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543326, "question_id": "FytCUtBUBr9o5WeuY7tH8Y", "question": "What timepiece is reflecting?", "choices": ["clock", "analog watch", "digital watch", "sundial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543326.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218267, "question_id": "FzFQCxYt9uXkTy4aRKpShd", "question": "What is normally attached to the orange vehicle?", "choices": ["train", "missile", "rv", "trailer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506891, "question_id": "G3r8e8XAMBRBgGo9iFtagu", "question": "What color will they turn if they are put in the fridge?", "choices": ["purple", "green", "red", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506891.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424540, "question_id": "G47ySb4zWfKoRtGVGjKjqb", "question": "What brand is represented here?", "choices": ["dell", "apple", "lenovo", "asus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424540.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549681, "question_id": "G5Ws7d4qttY7YYRPh9hPkU", "question": "What does the number on the train tell people?", "choices": ["fashion", "street address", "identification", "calculation"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000549681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565425, "question_id": "G6FoNn7NHwdHasLomRYDyw", "question": "The image seen on kites here appears to be a type of what?", "choices": ["zombie", "mask", "cloud", "animal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 359793, "question_id": "G7Bn3yhChMpAoFN6diBrBE", "question": "What is displayed on the phone in the middle?", "choices": ["weather", "stocks", "banking", "map"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000359793.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180763, "question_id": "G7eaCirbqHJGKMtiDVQZdH", "question": "What location can be seen at the top front of the bus?", "choices": ["new york", "estonia", "paris", "london"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180763.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 261420, "question_id": "G7osPZXHUYRcLh7RBvVPkF", "question": "What borough in New York City is mentioned here?", "choices": ["brooklyn", "queens", "manhattan", "bronx"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000261420.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338602, "question_id": "G8aeYTYHWq8SbLmvFkzZ8S", "question": "Why are the window so dark?", "choices": ["painted", "privacy", "are fake", "dirty"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000338602.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529747, "question_id": "G8n3BQ5ryZ96Ga5KEbL8Wj", "question": "What is placed inside the meters in order to use them?", "choices": ["bills", "tickets", "marbles", "coins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529747.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506118, "question_id": "GAE5uiAZuqCeYyzeWTaMXz", "question": "What are the arms made of?", "choices": ["stainless steel", "concrete", "wrought iron", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110789, "question_id": "GBEGtxMgzVGQTHTmF6p8Vb", "question": "What type of car would the tire in the behind the elephant be used for?", "choices": ["truck", "motorcycle", "car", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101971, "question_id": "GBQVfbzYbXg6fYpFVrMr3t", "question": "The person here is able to be propelled how?", "choices": ["sail", "motor", "no way", "oars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000101971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 244113, "question_id": "GBQuhunrqMcWxQ3kK7G3Xt", "question": "What action is this animal most known for?", "choices": ["pouncing", "goring", "flying", "swimming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000244113.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276355, "question_id": "GCDc7QYuHLrzvZXDUzGp3n", "question": "Which tournament is being played here?", "choices": ["america's cup", "pan-am games", "wimbledon", "australian open"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276355.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189840, "question_id": "GCPBii84NevNkQkCxKRvYj", "question": "What part of the sign was created after the rest?", "choices": ["red", "war", "pole", "stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189840.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144121, "question_id": "GDESwUgr39yVzbt4oM9hEd", "question": "What material is protecting the tables the flowers and plants are placed on?", "choices": ["tarpaulin", "paper", "mylar", "plastic sheeting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144121.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447486, "question_id": "GDVfu3wazEaZDYFSHnKMyq", "question": "Why are the horses here?", "choices": ["feeding", "lost", "resting", "mating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447486.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293965, "question_id": "GE5uwQ29mrmboWPkzThvEG", "question": "This type of hairstyle would be most appropriate on which one of these professions?", "choices": ["politician", "doctor", "musician", "minister"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293965.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340111, "question_id": "GEC6wSnLMkQxzaPCZcAvdu", "question": "What type of food is being cooked?", "choices": ["cheeseburger", "steak", "fish", "hot dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281639, "question_id": "GELZEeri7LuC6Nu5gUkMyM", "question": "How many times have these items likely been used?", "choices": ["none", "fifty", "three", "sixteen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281639.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395153, "question_id": "GEctDwpnEgmLMkw7qcR8Fp", "question": "What stores energy for this type of device?", "choices": ["battery", "solar panels", "gas cans", "windmills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2059, "question_id": "GEjgMxEHAgVdSb2Yf5PBhj", "question": "What type of act is the bear wearing leather straps portrayed as being engaged in?", "choices": ["horse riding", "wrestling", "archery", "bdsm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384876, "question_id": "GFSYSyLSPGnqrbQUPBErUW", "question": "What time is shown on the clock?", "choices": ["530", "midnight", "400", "noon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129261, "question_id": "GFrfGWU3kfXLpjRHWNHGqs", "question": "What part of this fruit would need to be removed to best enjoy it after ripening?", "choices": ["peel", "center", "stalk", "seeds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125300, "question_id": "GFtmTsE6pgWoBEwh6hUWRy", "question": "What value material is seen here?", "choices": ["gold", "opal", "ivory", "diamond"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457051, "question_id": "GH9w2eYibB833g4EHd3Yht", "question": "Which one of the grand slam tournaments is this?", "choices": ["french", "u.s", "wimbledon", "australian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545886, "question_id": "GHTwqwB2dPLftHVxtDZ2FS", "question": "Where is this sink located?", "choices": ["hospital", "store", "business", "residence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545886.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381768, "question_id": "GHe4rwMPq8oMmh2S5YUSiz", "question": "What is the cat using the shoe as here?", "choices": ["blanket", "stool", "pillow", "brush"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64525, "question_id": "GJCP9gPrYJZie9ySoKgrD6", "question": "What is next to the animal?", "choices": ["television", "laptop", "wrench", "book"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064525.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338170, "question_id": "GJmtzkY4pMy2C2pLZqWLN5", "question": "What is the person hovering over?", "choices": ["steps", "car", "river", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391472, "question_id": "GLWZ5XddSYXsDBmGqkzJPZ", "question": "What is one the end of the legs of these creatures?", "choices": ["feet", "paws", "hooves", "flippers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391472.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413102, "question_id": "GMddtdUCVfPAfopqSQfF7H", "question": "This hose traveled there on what type of vehicle?", "choices": ["garden truck", "fire truck", "police car", "ambulance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413102.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231678, "question_id": "GMh4zEiEs8HgekyRnETvg8", "question": "Which one is a domesticated one-toed hoofed mammal?", "choices": ["horse", "dog", "cat", "cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231678.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17757, "question_id": "GNwnFa8ey6Nw4VLx6mWm4a", "question": "What is facing the couch?", "choices": ["curio cabinet", "fridge", "bed", "television"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482640, "question_id": "GPnhfCCAxbywsnfjE4MMNr", "question": "What sport is the man playing on the TV?", "choices": ["hockey", "basketball", "football", "soccer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428978, "question_id": "GQBkhcrG53tuy6sqrYLcQx", "question": "How are the flying objects being controlled?", "choices": ["remote", "string", "computer", "battery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86251, "question_id": "GQNS7xV2RhVP4eMdeGBkzY", "question": "What is the food sitting on top of?", "choices": ["counter", "plate", "cutting board", "stove"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086251.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574603, "question_id": "GQTKKQiyLgZGLm8WdtqojX", "question": "What venue are the animals in?", "choices": ["horse barn", "circus barn", "park", "zoo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000574603.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401933, "question_id": "GQs8vgnfc8NWQJe4P7mAcZ", "question": "What material is the floor made of?", "choices": ["ceramic", "metal", "cement", "porcelain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235487, "question_id": "GRameMGDYX6EsR2S4kkc2e", "question": "What is the person doing with the white electronic device?", "choices": ["gaming", "cleaning", "brushing teeth", "massaging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235487.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75329, "question_id": "GSBZpQuGUyLiQykwbjkmxh", "question": "The birds legs allows them to stand in what to seek food?", "choices": ["snow", "sand", "grass", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455605, "question_id": "GSDUSn6zdKKbnyLjbvxM82", "question": "What type of computer is shown on the bed?", "choices": ["mainframe", "in room", "business", "laptop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455605.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282608, "question_id": "GSiKacxbTxUBTGwsjA97ta", "question": "Why is there water around his head?", "choices": ["shook-it-off", "board splashed", "big wave", "raining"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321520, "question_id": "GSxsNa5W9DHRumKKdJLJV3", "question": "How many cats are eating fish?", "choices": ["two", "five", "three", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178454, "question_id": "GTBCnTbV3Fvx7Tx958CQeN", "question": "What is the bird standing on?", "choices": ["bench", "couch", "perch", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178454.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 220804, "question_id": "GTSjgxmhBWqF49BAEviGeK", "question": "What is the job of this horse?", "choices": ["carry", "push", "pull", "race"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000220804.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38826, "question_id": "GVSm2FDkGCwR8mq5LKxwvt", "question": "Why is the board under the boarder?", "choices": ["falling", "showing off", "confused", "slipped"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000038826.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183518, "question_id": "GVaCQeahbxhHZCNYwTBmGh", "question": "What is the smaller giraffe?", "choices": ["adult", "father", "child", "mother"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183518.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349680, "question_id": "GWSqG4Pgm7LovMtM7n8Nma", "question": "Where is the top part of this decoration found in the human body?", "choices": ["legs", "head", "stomach", "arms"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121391, "question_id": "GWfkr2axrmmK3gFVuufcgs", "question": "What is painted on the wall?", "choices": ["asteroid", "flames", "stars", "moon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121391.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194649, "question_id": "GWjyMw2rAebyDTZwkcqUSG", "question": "What is the person holding in their hands?", "choices": ["bats", "canes", "racquets", "poles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000194649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386477, "question_id": "GXCZDYV2SW8Ha7FJDi26Dq", "question": "What is the profession of the person who arranged this decor?", "choices": ["teacher", "pilot", "florist", "driver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386477.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224072, "question_id": "GXTHJjpnQadZvFrRQj2M27", "question": "What shape is the birdbath?", "choices": ["rectangle", "square", "triangle", "circle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122340, "question_id": "GYbAcvm3NCCz4DkxZFzGNY", "question": "What vehicle is the person riding?", "choices": ["jet ski", "oil barrel", "plywood sheet", "surf board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484526, "question_id": "GYnbCMHzKq5GNECZYBEppp", "question": "What is the main ingredient in this dish?", "choices": ["toast", "lettuce", "asparagus", "eggplant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484526.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368341, "question_id": "GapCBWMKt5PeMyGSKLRKP2", "question": "How many seconds until a similar clock with a grandfatherly name might chime two?", "choices": ["300", "seven", "three", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444785, "question_id": "GaxCTTFrGu3SwmBH4bBnuN", "question": "What type of trailer is shown?", "choices": ["class", "fifth wheel", "class c", "class b"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444785.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 203991, "question_id": "Gbs35G52r5HFzMqTaQj4R2", "question": "The man is doing what?", "choices": ["bending over", "eating", "sleeping", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000203991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 380796, "question_id": "Gc6kSyrn5jsxFxLCzW2WMv", "question": "What time of day would the round circles be most useful?", "choices": ["night", "mid-morning", "noon", "afternoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000380796.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434601, "question_id": "GcJeRYaA3vyQyCzZbpjnJx", "question": "What is the activity of the person seen in the mirror?", "choices": ["running", "driving", "playing", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207700, "question_id": "GcMa2WweEjLfnLagaZhD9n", "question": "What might the area used by stuffed animals be used by the individual who plays with them?", "choices": ["selling", "working", "eating", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207700.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197761, "question_id": "GdAfKjBvwEgS4WAdN28huo", "question": "What country is the flag above represent?", "choices": ["chile", "canada", "france", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175900, "question_id": "GdaUQhhPDPfFYLpN4aNjzu", "question": "What kind of bottle is on the desk?", "choices": ["wine", "medicine", "decorative", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577427, "question_id": "Gh55DAwjQSQ9Jpz4vjeEt4", "question": "What are these zebras ready to do?", "choices": ["sleep", "drink", "eat", "play"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419490, "question_id": "Gh6ajAqXPmtaEvcinVpr4v", "question": "Which government owns this railway?", "choices": ["danish", "dutch", "brazilian", "italian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419490.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79524, "question_id": "GhBhpDPrCFws7H3ooRzjmb", "question": "What is this dog trying to do?", "choices": ["attack", "eat", "hide", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505298, "question_id": "GhTiZhgaZzEyUzAPTBmBxY", "question": "What type of animal is seen behind the zebras?", "choices": ["hippo", "cow", "chicken", "pig"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505298.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103803, "question_id": "Ghi9K8RJVNFrj3gN4FXAyG", "question": "This model of train was first built before which year?", "choices": ["1860", "1620", "1940", "1820"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000103803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495566, "question_id": "GjwGc26aobzx5cgyLBj6M6", "question": "What kind of vehicle would be most common in this neighborhood?", "choices": ["sherman tank", "camel", "b-52", "rolls royce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569260, "question_id": "GmWrvsAPZkq7Mmz3wQMqjU", "question": "The bird here seeks food from where?", "choices": ["rocks", "water", "trees", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569260.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136854, "question_id": "Gn6y2HxwGkbHrUugYdJ4Vx", "question": "What is a similar language to the original language the signs are in?", "choices": ["chinese", "polish", "german", "spanish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554492, "question_id": "GniAUurQiXTuXMiXtfGcJZ", "question": "What position are the blinds in?", "choices": ["partial", "open", "closed", "removed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000554492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425299, "question_id": "GpVc3tVTZYevjjfphrqbbo", "question": "Which one of the following animals sometimes preys on these?", "choices": ["bald eagle", "cougars", "bison", "hyenas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000425299.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339551, "question_id": "GpggsKPdDmyd6TcJWhNV7N", "question": "What is the dog trying to do?", "choices": ["get away", "grab box", "fix bed", "find pizza"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153651, "question_id": "Gps3YskHbrEGXbTC6SHE85", "question": "What is required for this activity?", "choices": ["sun", "rain", "wind", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153651.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555716, "question_id": "GqByXYiW6gTYw5xom8FMR5", "question": "What is the name of the sport object being played?", "choices": ["frisbee", "flying ring", "jai alai", "lacrosse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555716.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391649, "question_id": "GqJRHBBryeUTPLSf5C7o78", "question": "What is closest to the lever on the toilet?", "choices": ["plunger", "beanie", "wallet", "sink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517550, "question_id": "GqKmmZTVEZQ2yX2PiJQMXV", "question": "What is near the hydrant?", "choices": ["car", "water", "fire", "firefighter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75951, "question_id": "GqXrApYRAaXmdVNwWuPPxx", "question": "What is upside down in the mud?", "choices": ["banana", "hat", "plastic bottle", "shoe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075951.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555330, "question_id": "GrJupkzYWfKF9ZmVaszT9o", "question": "What is the name for the black bird in the snow?", "choices": ["crow", "pigeon", "dove", "parrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555330.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124114, "question_id": "GrbjWGHUimQSSKKGcCEA8e", "question": "What is mojo surf?", "choices": ["large waves", "board name", "girl's name", "beach's name"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000124114.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340660, "question_id": "GrmUEN3T7jAHNXUnyWRTAC", "question": "What time of day is this?", "choices": ["evening", "noon", "early morning", "late night"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32096, "question_id": "Gt3L9eqW6jKnqUAUSv2pa9", "question": "What color is the root vegetable shown here?", "choices": ["purple", "red", "blue", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032096.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317285, "question_id": "GtX3mSpF3rJkSGsJ5xRKRW", "question": "The basket shaped item most likely contains what?", "choices": ["seeds", "bark", "crayons", "milk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90286, "question_id": "GtXxfyQqUn5YnX7if2vyUN", "question": "What does the bear seem to be doing in the dirt?", "choices": ["mating", "resting", "eating", "playing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54454, "question_id": "Gtrrx4hoFDTNyh6EKFTE5g", "question": "Based on the lines on the bread what type of appliance was this cooked on?", "choices": ["skillet", "crock pot", "grill", "oven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054454.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51112, "question_id": "GtsAZfSnd5dk6pVDuLVn4p", "question": "Who made this vase?", "choices": ["carpenter", "painter", "glass smith", "potter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051112.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111464, "question_id": "GtuUpBZBbUnouyfkYzYUAo", "question": "What are the white structures in the background harnessing?", "choices": ["water", "coal", "wind", "sunlight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19014, "question_id": "Gu9dtRRcYpnSyXdHQ6Hzke", "question": "What brand of shoes are on the ground?", "choices": ["nike", "converse", "adidas", "vans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019014.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279702, "question_id": "Guqnnzx8oWZyyW9xHuyvHJ", "question": "What type of animals are shown?", "choices": ["tiger", "lion", "giraffe", "gorilla"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279702.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133888, "question_id": "Gw55rTKm7PmfES8H5CU3zj", "question": "These items symbolize a popular what?", "choices": ["song", "movie", "game", "city"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133888.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414841, "question_id": "GwcS3C29KrmDbr77fKX5RF", "question": "What is the number on the garbage can?", "choices": ["35", "22", "99", "57"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414841.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125194, "question_id": "GxCghkZFmKN9EPxRPipP2G", "question": "What position is the person?", "choices": ["laying down", "standing tall", "upside down", "bending"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315170, "question_id": "GxEFUx2kpo2oQP7eiFcuYM", "question": "What is the bird using the puddle for?", "choices": ["food", "predators", "bugs", "bath"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168860, "question_id": "GxQMcoxo8kys3unf3ThWxt", "question": "The characters are written in what script?", "choices": ["thai", "korean", "japanese", "chinese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168860.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493492, "question_id": "Gy5nfMEMhVM57wjWaEFCne", "question": "What is the bear doing that's making him soaking wet?", "choices": ["bathing", "fishing", "exercise", "playing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559917, "question_id": "GyHegrz5jh9YpiAW6viyXM", "question": "What position is the man in?", "choices": ["jumping", "hand stand", "crouching", "upside down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471485, "question_id": "GyWH8RPKFnpHUieqzD5B92", "question": "What is the man in the doorway doing?", "choices": ["sneaking", "painting", "gaming", "taking photo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384279, "question_id": "H2emmghne7V5dfQeNJUcix", "question": "What is the man risking by cutting his hair by himself?", "choices": ["life", "health", "looks", "nothing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000384279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226723, "question_id": "H2vZuhv6uL7UsTxysiaDRh", "question": "Who is this treat meant for?", "choices": ["boss", "dog", "child", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516099, "question_id": "H3TKxanrxrXAJbBa27gFxa", "question": "How would an British person spell the item the man has his hand on?", "choices": ["box", "theatre", "armour", "racquet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516099.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299701, "question_id": "H4VFpyodFxTubzkWmCqh3T", "question": "What powers this train?", "choices": ["battery", "gas", "diesel", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21823, "question_id": "H4iJG3PSEbwQeBGHA5Spc5", "question": "Whats the best place for the dog to go if it rains?", "choices": ["bike", "crate", "window", "wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536115, "question_id": "H6BkU7i2Urop4grS2pE9ax", "question": "What is the age group the elephant on the left is?", "choices": ["ancient", "elderly", "child", "adult"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536115.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328556, "question_id": "H7CuYzW79P7gL5eYikiu5H", "question": "What type of location is this bird located in?", "choices": ["desert", "coastal", "arctic", "jungle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328556.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155646, "question_id": "H8Jbjteo4SZXwusGxrXfZQ", "question": "What is the cat drinking out of?", "choices": ["bottle", "bowl", "mug", "tray"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63291, "question_id": "H8UqDBgLY3zuZAbK7Tgd9f", "question": "Why ist here a tag near the cow's ear?", "choices": ["tracking device", "identify it", "for sale", "alarm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063291.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574306, "question_id": "H8f5y5FnAztci9vik9xUXs", "question": "What are men's bathing suits mostly made of?", "choices": ["spandex", "cotton", "wool", "polyester"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000574306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121138, "question_id": "H8nUVQCVuqFmeZay35rybH", "question": "What went wrong with this mans play?", "choices": ["dropped racquet", "missed ball", "slipped", "fell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243790, "question_id": "H9E7M8cuuA2YwYpMuUbRNd", "question": "What is painted permanently on the mirror seen here?", "choices": ["window frame", "social", "persons image", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581255, "question_id": "HA3qQjNb3Wfv4btqKmPWRG", "question": "Where is this cat located?", "choices": ["house", "veterinarian", "office", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581255.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93514, "question_id": "HBaKn6RqeRZS9Fr2MLe5Pj", "question": "How would you describe this skate environment?", "choices": ["vert", "downhill", "bowl skating", "urban"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000093514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578275, "question_id": "HBmTR2LhDWySjcBSfae9s4", "question": "What is sitting on top of the laptop keyboard and mouse?", "choices": ["hand", "remote", "food", "bottle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578275.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506423, "question_id": "HBoHb2zvrjKbwUy7DRSbwB", "question": "What type of eater are these creatures?", "choices": ["photosynthesis", "omnivores", "carnivores", "herbivores"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579928, "question_id": "HCzJZqZsNqZ73YArrkT4mW", "question": "Who was famous for riding one of these animals?", "choices": ["lady godiva", "stephen hawking", "stevie wonder", "rip torn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579928.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249191, "question_id": "HDxzATw9z5S52Jvg5kUmbc", "question": "How many people could approximately fit on the structure closest to the camera?", "choices": ["eight", "five", "six", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249191.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 94634, "question_id": "HEJpdoRvCGDzFozDgY5Pt9", "question": "Based on the fabric and the context what are the pet and remote on?", "choices": ["table", "couch", "bed", "carpet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000094634.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517633, "question_id": "HERjWhnUfQ6mbqwYM5K8cD", "question": "What is this animal trying to do?", "choices": ["run", "roll", "drink", "hide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517633.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232057, "question_id": "HEV66SEpN8CqeVsrHq3tZH", "question": "What are the atmospheric conditions here at this time?", "choices": ["calm", "hurricane", "monsoon", "windy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547019, "question_id": "HFt8tSHe8mbc2HFX6VgWAU", "question": "What would this road in the middle of the desert best be used for?", "choices": ["stores", "safari", "traveling", "industry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547019.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27169, "question_id": "HFtoCNSbMfqnqo6c8b4crp", "question": "What shape is on the jacket?", "choices": ["heart", "circle", "triangle", "line"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027169.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29239, "question_id": "HGWqey99B6GxwzCzUJCrxW", "question": "What surface are these bags located on?", "choices": ["ground", "table", "lap", "belt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029239.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 469511, "question_id": "HGz4UrAGN9vWMmtZ7yv2a6", "question": "What is the name of a vegetable that is the same colour as the clock on the wall?", "choices": ["pumpkin", "onion", "cabbage", "aubergine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000469511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257802, "question_id": "HH6bVehiug6J8UZNmiqo9A", "question": "What dessert is in the baggy?", "choices": ["cookies", "muffins", "donuts", "cannoli"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540058, "question_id": "HJG26896iinwrRNxTGWvFf", "question": "What is the giraffe eating from?", "choices": ["lake", "tree", "bowl", "basket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540058.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368059, "question_id": "HJZBYodd93rBvCvchxFKDi", "question": "What object would allow the skate boarder to do this type of air trick?", "choices": ["ramp", "stair case", "ledge", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465561, "question_id": "HJu8NC8ojTUtruDYU2isQN", "question": "What kind of communication is displayed here?", "choices": ["warning", "mandate", "psa", "advertisement"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465561.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37803, "question_id": "HJxAmjSgEm4NUghcQ8uYuj", "question": "The floor is made out of what kind of material?", "choices": ["carpet", "stone tile", "laminated wood", "acrylic tile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271816, "question_id": "HKGvQfGoNKWUUtCZiMSJeF", "question": "What is unusual about how he is presenting himself?", "choices": ["pose", "makeup", "tie", "suit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271816.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11752, "question_id": "HLLBJMY9tYiadvS6Chhoqa", "question": "What vegetable has been placed on this sandwich?", "choices": ["tomato", "lettuce", "spinach", "zucchini"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82527, "question_id": "HLvAEXkeqR4mbXDMYHo4E5", "question": "What would be a good location to do the activity the woman is doing?", "choices": ["dead sea", "arizona desert", "egypt", "siberia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082527.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81241, "question_id": "HMJAH4WHnVfBhEp2GD2Aet", "question": "What is the woman doing while on the bed?", "choices": ["eating", "reading book", "ironing clothes", "getting dressed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15808, "question_id": "HPJhwQ39Z3pRhFSLLH9D7r", "question": "What is this animal often shown to like eating?", "choices": ["bananas", "foam", "peanuts", "mice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015808.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243348, "question_id": "HPPmNJTYuJiZuudYo9Q53U", "question": "What visible item might one get driest by using here?", "choices": ["sheet", "towel", "house coat", "mattress"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393197, "question_id": "HPSqbV2UVQ2wwrqyXyfhLf", "question": "What is the cat doing?", "choices": ["hiding", "typing", "grooming", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393197.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211077, "question_id": "HPaW9ZCk8rfxj7kYDW2Pjj", "question": "What are the thin black objects on the window used for?", "choices": ["painting", "directing", "signaling", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211077.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396616, "question_id": "HQNT58xzqE86bkFXp947Et", "question": "What information does the lighted object provide?", "choices": ["time", "location", "speed", "temperature"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396616.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330963, "question_id": "HQrosh7gSeiWyQFpMvgSWg", "question": "The woman holding the wine glass with spiderwebs on its base has dressed up for which holiday?", "choices": ["labor day", "halloween", "christmas", "new year's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195274, "question_id": "HRHdxwwenoWvx9LSKuxfRT", "question": "What specific kind of an animal is this?", "choices": ["sphynx cat", "dalmatian dog", "bluejay", "panda bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195274.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130782, "question_id": "HSUzPpvhJZJM5EtrdMvGWd", "question": "Which group of people invented skateboards?", "choices": ["scientists", "bankers", "surfers", "dancers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130782.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473894, "question_id": "HTUwUbrGpJVHYTBvrGpYqc", "question": "The woman on the bed enjoys what type of hobby?", "choices": ["cooking", "watching television", "reading", "knitting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 28023, "question_id": "HTfuQWsRyfKg7TLwPvVtjV", "question": "The clock hands are at?", "choices": ["126", "200", "100", "1246"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000028023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418452, "question_id": "HUUpuZeCGFBzMrY4ryviwt", "question": "What helps the person with the toy here keep it without fully losing it?", "choices": ["beeper", "string", "nothing", "drone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37000, "question_id": "HV7oitFNoYbYyMeK8PKLsk", "question": "What is the English name of the sixth city mentioned?", "choices": ["washington", "walton", "warsaw", "warwick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171972, "question_id": "HVFNNEpmM8gr6b9f9iYzwX", "question": "What is allowing the sun to put a glare on the computer screen?", "choices": ["blinds", "cord", "curtains", "mug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171972.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25617, "question_id": "HWHzuEXUVcpXj6c7QutGad", "question": "What musical instrument is implied by the logo?", "choices": ["trumpet", "violin", "piano", "harp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025617.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297341, "question_id": "HWNVLNbTtSXMWRY2TM4nUL", "question": "What is this baby learning to do?", "choices": ["crawl", "brush", "drink", "walk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83182, "question_id": "HWU9wu5mqdcMTrLkaDhWNt", "question": "In which motion does this individual move?", "choices": ["up down", "sideways", "arc", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536378, "question_id": "HWfZrPMRGgbmCGcghfL2Da", "question": "What type of trick is the snowboarder pulling off?", "choices": ["stall", "grab", "flip", "spin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536378.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84907, "question_id": "HXXazr7U9RpsaihZiKNAx2", "question": "In which part of Europe is this highway located?", "choices": ["iberia", "eastern europe", "mediterranean", "scandinavia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465727, "question_id": "HXkTWt7zSDT2cDLXuCfBpa", "question": "How did the younger bear get through the fence?", "choices": ["is smaller", "is braver", "is smarter", "is lost"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000465727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8500, "question_id": "Ha9Z9tKswgzKNHBXuX7zr8", "question": "What job position does the person here most likely hold?", "choices": ["sheriff", "shepherd", "marshall", "goatherd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008500.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462318, "question_id": "HbQo6XK38diyFpzhnCeQ49", "question": "What is the location of this restaurant?", "choices": ["usa", "france", "england", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354510, "question_id": "HdcYsj5zgwCACzAdtWpoCu", "question": "This road signs indicates what sides has to keep?", "choices": ["middle", "none", "right", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354510.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532894, "question_id": "Hdh5zV9LE8WhGfbxkR8Wji", "question": "The Belted Galloway in the front is sometimes called by what cookie name?", "choices": ["samoa", "fudge", "sugar", "oreo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151427, "question_id": "HepGWViuii2jcYynLdkEFe", "question": "What is the major theme that was created for the clock?", "choices": ["royalty", "poseidon", "zeus", "dragon"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000151427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4726, "question_id": "HfmHMhamyN2Tz5j8wpo6yv", "question": "What type of store carries this item?", "choices": ["florist", "salon", "bakery", "gym"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475793, "question_id": "HhbQr2DmET9UMMpa3yLV95", "question": "What are the two bumps on the animals head called?", "choices": ["ossicones", "tendons", "antlers", "ohmmeters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000475793.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477558, "question_id": "HhrWUabP39hjNQpD3p5fNb", "question": "What state name is pronounced like like the word that appears in front of street on the sign?", "choices": ["alabama", "new jersey", "utah", "tennessee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477558.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22254, "question_id": "Hi7FHZSb7goJL9wGR26ZoU", "question": "What type of energy is consumed by the object emitting light?", "choices": ["electrical", "heat", "gaseous", "potential"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022254.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499919, "question_id": "HiJG9V7LzTAso2JM5wcjPR", "question": "What is in the mason jar?", "choices": ["trees", "peppers", "sand", "apples"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499919.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396991, "question_id": "HjHkWhQZwq2Y26jdPdLfpJ", "question": "What is the white substance which covers the rocks?", "choices": ["calcite", "quartz", "guano", "limestone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388573, "question_id": "HjmsBgxJZb2nsjmcTSBDY6", "question": "How does the food taste?", "choices": ["great", "can't tell", "terrible", "okay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000388573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504904, "question_id": "Hjr63tMDZbcbyNTc6XGGHm", "question": "Why is he wearing a suit?", "choices": ["uniform", "costume", "warmth", "meeting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19583, "question_id": "Hjs3hM8JAoCrqdqZdCR7PB", "question": "How many person can be seen in the reflection?", "choices": ["five", "six", "three", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35688, "question_id": "Hju6XxfqzuBdGyFmCaJSos", "question": "What is this type of seating called?", "choices": ["sofa", "bean bag", "recliner", "stool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510348, "question_id": "HkrRpPgjYEWR6BL3DKJdxy", "question": "What would this animal choose to eat?", "choices": ["whale", "soda can", "honey", "shoe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 88319, "question_id": "Hm7C2LZtc4BHJP6A6m33fF", "question": "What does the R.I. stand for?", "choices": ["robotics institute", "rhode island", "rotary international", "regina imperatrix"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000088319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51996, "question_id": "Hmuc4SdB3QgwZDmmLYeUGF", "question": "This cake would be perfect to display during what season?", "choices": ["autumn", "spring", "winter", "summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457949, "question_id": "HmxsQVEYrD4gNysPu74aot", "question": "Where will the truck take those cars?", "choices": ["repair shop", "police station", "rodeo", "dealership"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457949.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575118, "question_id": "HnbtykHbAEnw2Cpn8PbB3p", "question": "What is this kind of transport?", "choices": ["road", "railway", "air", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29440, "question_id": "Hq69gUzgEPSbHNr7En2KtA", "question": "What is flying in the air?", "choices": ["kite", "helicopter", "bird", "airplane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029440.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80681, "question_id": "HqAnHwnL8zUKWKd6RrwSjx", "question": "What damaged the leaves?", "choices": ["water", "insect", "wind", "poison"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190833, "question_id": "HszxLJkP3vuomA5h8iKJgQ", "question": "What is the skin of this animal like?", "choices": ["slick", "slimy", "cold", "fuzzy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565638, "question_id": "HuRfPbhS8EGzLjcgUvrYDH", "question": "Who is famous for competing in this sport?", "choices": ["mike trout", "jim cornette", "laird hamilton", "jerry jones"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000565638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362673, "question_id": "HuzZuaGGZGVitu5g3ZJoEK", "question": "What is the dog doing?", "choices": ["biting", "sitting", "running", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362673.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282536, "question_id": "HvTvzU8qyVUwTuCy3Bz24T", "question": "What type of area is shown?", "choices": ["forest", "dessert", "urban", "rural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282536.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493554, "question_id": "HvywrxahAZDzydzFnXQcRc", "question": "Which of these could fit between the two trains?", "choices": ["person", "bus", "boat", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493554.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493348, "question_id": "Hw3eiqD3ZgK4cpyNniqGb5", "question": "What is on top of the board?", "choices": ["foot", "cat", "dog", "fishing pole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291581, "question_id": "HwtVVUGUmnfG7eYzoxj8ys", "question": "Why is he most likely smiling?", "choices": ["heard joke", "losing", "sees friend", "winning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453556, "question_id": "HxLFtRUGJmGfBxZdMRjeKg", "question": "Where are these toys?", "choices": ["daycare", "house", "thrift shop", "department store"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000453556.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291392, "question_id": "HyPhuJga2uEEZQBmvwnxUZ", "question": "In what city was this airline founded?", "choices": ["marseille", "frankfort", "budapest", "barcelona"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554993, "question_id": "HzJo8HrJakxQiGjHVW9Gci", "question": "What is the expression of the boy showing in his face?", "choices": ["anger", "laugh", "smile", "warm"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000554993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30199, "question_id": "HzQcjrNSRu4pRdAPLRNeca", "question": "What is this shelter commonly used for?", "choices": ["eating", "bus stop", "cooking", "meeting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030199.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382719, "question_id": "J3oW8tnA5n4mK9jVGJTjfL", "question": "This place is most likely what?", "choices": ["circus", "museum", "jungle", "plains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281137, "question_id": "J45Ngmwumpbm338pWbYJsK", "question": "How has this food been served?", "choices": ["diced", "sliced", "shaved", "scooped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281137.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 141202, "question_id": "J47LLCXtD8oGS4mHxkWhdG", "question": "What language are the subtitles?", "choices": ["ukranian", "russian", "polish", "finnish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000141202.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98797, "question_id": "J4DZDiKUy9fMN6oRAJ3Zcb", "question": "What is the man wearing on his head?", "choices": ["fedora", "helmet", "baseball hat", "beanie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098797.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529595, "question_id": "J4Y3KyJ7HjNFYupHCXBzYt", "question": "Who is famous for playing this sport?", "choices": ["simone biles", "eva green", "jens pulver", "maria sharapova"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529595.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302309, "question_id": "J4ftfgJ4tpfeafMKTZhAPD", "question": "What unusual element is associated with the breath of the creature seen on this vase?", "choices": ["water", "wind", "fire", "earth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302309.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570730, "question_id": "J5p4mEviWBnMqM6yXvzEn8", "question": "What activity involves similar motions to what the cat is doing?", "choices": ["yoga", "pumping gas", "eating", "typing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570730.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556354, "question_id": "J5riyiMBnvq27y8dFD55wB", "question": "What is on top of the bus?", "choices": ["snowboard", "surfboard", "canoe", "sled"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299627, "question_id": "J7WTC8BMubfQAmxzV4aKVB", "question": "What part of the city does this building appear to be in?", "choices": ["low end", "middle class", "private community", "high end"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192336, "question_id": "J7sA4bYLiJwt5K8QBQQJSy", "question": "What is the sheep doing?", "choices": ["hiding", "mating", "feeding", "running away"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107315, "question_id": "J86ijVuGT7Ygy3uRwf7Swy", "question": "What might someone looking in the mirror here pretend that they themselves have?", "choices": ["beards", "cell phone", "mustaches", "bald heads"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448305, "question_id": "J9m874iihmihwsGSKt5mZX", "question": "What sits upon the taller zebras upper head?", "choices": ["ear", "bird", "flying insect", "rodent"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448305.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522761, "question_id": "J9wbh9zk4HmHYHoerxT9V6", "question": "From which plant did the food being eaten here originate?", "choices": ["grass", "oleo", "grocery stores", "dairy cows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378315, "question_id": "J9xyi4zxxQjE65KJDjXeG2", "question": "What brand is the cellphone from that the man is holding in his hand?", "choices": ["motorolla", "samsung", "apple", "lg"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518193, "question_id": "JBHE2tXVx32KMxHMdtMZmh", "question": "What region of the world are these toilets most common in?", "choices": ["middle east", "australia", "asia", "west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518193.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280285, "question_id": "JBewEr3Jw2NhwbxkYtAR5M", "question": "What are the two people in the forefront a part of?", "choices": ["college alumni", "tennis team", "no relationship", "pilots club"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373841, "question_id": "JCYyt6CDCydb8JXtkcdge8", "question": "What part of the animal is visible?", "choices": ["ears", "tail", "paw", "gills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373841.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65340, "question_id": "JCzWGDjVst9QuGh8zXebxm", "question": "In what state is this food?", "choices": ["uncooked", "partially eaten", "completely eaten", "whole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390498, "question_id": "JDjGV4ddmxcuBVugbke839", "question": "Which animal is closest to the gate?", "choices": ["cat", "dog", "pig", "amoeba"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131707, "question_id": "JEHdy97NAUcdbqbhpNSuzi", "question": "Which color indicates fire hydrant temporarily unavailable to use?", "choices": ["black", "red", "purple", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000131707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428443, "question_id": "JEsvZL5v4ZoufX7wHhxvWf", "question": "Which part of these animals is most easily formed into yarn?", "choices": ["horns", "hair", "milk", "hooves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536125, "question_id": "JFkanKjR7gMjYLsX8gKD5B", "question": "Which of these objects is most likely in the immediate vicinity of this giraffe?", "choices": ["office building", "stoplight", "stop sign", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536125.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406367, "question_id": "JFnrRXeWiaTPxeQDsbgmVo", "question": "What is the large black device to the right of the cake?", "choices": ["drill", "light", "airplane", "rocket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000406367.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260069, "question_id": "JG4nesDQQhdjR9PF4XLKKe", "question": "What is this type of area called?", "choices": ["desert", "arctic", "urban", "rural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534853, "question_id": "JG6Mjn2NRP6CTcZbKQmT4W", "question": "What type of buildings are located here?", "choices": ["barns", "sheds", "commercial", "homes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111977, "question_id": "JG7P6dXaXeFeX3A2VFrwki", "question": "What place is known for making these food items?", "choices": ["chipotle", "dunkin donuts", "subway", "taco bell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534709, "question_id": "JGKahmFEvrmwWKMh7sXgUG", "question": "What is the primary use of the surface the bear is on?", "choices": ["logging", "farming", "camping", "transportation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362417, "question_id": "JJGAFFrdvvykpwsWspKJfn", "question": "What city is on the sticker?", "choices": ["atlanta", "boston", "ny", "washington"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362417.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 509865, "question_id": "JKKzCWa32CyGxz2AbD8qrX", "question": "Why are the giraffes crowded around this area?", "choices": ["to drink", "to eat", "to communicate", "to sing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000509865.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263713, "question_id": "JL6w52E4dWyz9NPDhpfA26", "question": "What type of station or work space is found in the round area?", "choices": ["cubicle", "information reception", "work release", "human trafficking"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000263713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413798, "question_id": "JLcintVpMHmZvrFqpT7rAM", "question": "What size pizza would this be called?", "choices": ["extra large", "medium", "large", "personal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424934, "question_id": "JMy4G2VcqAJ2iZv9oDwnZ5", "question": "What is the color of the teddy bear above?", "choices": ["green", "red", "yellow", "pink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424934.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459798, "question_id": "JN7AjrBs8KrW4tkW8pJ3c4", "question": "What is she ready to do?", "choices": ["dunk", "swing", "shoot", "dribble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344355, "question_id": "JQWbn5L935MVF2gorXCg2V", "question": "Where are these elephants at?", "choices": ["mountains", "zoo", "forest", "desert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344355.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452952, "question_id": "JQdFwbpgWipcetwnYc7BGv", "question": "What sized dog is shown here?", "choices": ["ball park", "petite", "foot long", "vienna mini"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98754, "question_id": "JQeNfshZWxG22Xmvf8PipU", "question": "How many international destinations does this airline fly to?", "choices": ["20", "40", "32", "15"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098754.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219660, "question_id": "JR7RDopC4ZczmTgHRwMJco", "question": "What structure can be seen?", "choices": ["car park", "trees", "cars", "skyscraper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000219660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107079, "question_id": "JRZKvueQ6xdnPUsmkCv6es", "question": "What position is this bucket in?", "choices": ["upside down", "tipped", "balanced", "upright"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536537, "question_id": "JSX5T33eWvYkFm33BPaht6", "question": "What action did the zebra most likely take immediately take after this photo was taken?", "choices": ["eat", "spin", "jump", "fly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113479, "question_id": "JScuDADNfrzev2zQ2VLfrL", "question": "What was the name of the first cloned type of this animal?", "choices": ["dolly", "bessie", "maude", "polly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508138, "question_id": "JSdFXSxW3biXtu2ZCvZpgR", "question": "Men are seen here doing what forbidden action?", "choices": ["singing", "running", "surfing", "drinking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397932, "question_id": "JVL4ZUc4oRHgpdxuGtLP4Q", "question": "Which clock hand is pointed at the 7?", "choices": ["none", "second", "minute", "hour"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445065, "question_id": "JVr2zYvwAv8NgXRjF5tmT4", "question": "In which direction will this person move?", "choices": ["down", "up", "right", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427267, "question_id": "JVrDBmvCCmKtqe69hPe2ic", "question": "Logically speaking how many wheels would this vehicle have?", "choices": ["two", "three", "18", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57, "question_id": "JW4AyiNxrfE9oqGDtXqiJo", "question": "What has disturbed the earth?", "choices": ["tennis racket", "broom", "his feet", "rain"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000000057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472638, "question_id": "JWGJ5tkLCJxrnqdr8xP7jk", "question": "What type of transportation is shown?", "choices": ["water", "air", "rail", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424892, "question_id": "JWL4HTgWin5khP2HAYezRo", "question": "What does this tool do?", "choices": ["vacuum", "paint", "plunge", "wash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92162, "question_id": "JXfAUHpxxdqSbW8odTztWu", "question": "Why is the access to the feed box so high up?", "choices": ["for giraffes", "prevent children", "deter zebras", "prevent theft"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536409, "question_id": "JY5uMftMkZYWBi9svotMpp", "question": "What most likely happened to the tree the bear is sitting on?", "choices": ["it fell", "it vaporized", "it petrified", "it burned"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536409.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 426211, "question_id": "JZ6atKBZpWToxSEAWDM8GG", "question": "What kind of food is near the leaves?", "choices": ["meat", "fish", "fruit", "cheese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000426211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 405339, "question_id": "JZLp9obYJuYm8doxjxVDVy", "question": "At least how many people will share this pie?", "choices": ["two", "one", "five", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000405339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504509, "question_id": "JaBWCXApjuJQLuNb6xTRWh", "question": "What happened to this train?", "choices": ["engine failed", "crashed", "derailed", "exploded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504509.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76435, "question_id": "JaZjKtWTDv7wsMtYcrELxf", "question": "What beverage is being enjoyed with the pizza?", "choices": ["beer", "soda", "water", "wine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 531505, "question_id": "JakZbgJgZuwZFyQL8WpJKX", "question": "What is on the towel?", "choices": ["cat", "toothbrush", "baby", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000531505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412893, "question_id": "JbECXFDYhuKaMmz5eFTBMW", "question": "The item in the bottle will do what?", "choices": ["wash hands", "flavor food", "quench thirst", "clean table"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000412893.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484834, "question_id": "Jbmnwv5UynshhsZaninstv", "question": "Why is the boy wearing a glove?", "choices": ["warmth", "cleaning", "fashion", "catching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484834.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363897, "question_id": "JcEdCvgxJUaABz2jSQ5ir6", "question": "What is this woman likely drinking?", "choices": ["soda", "water", "beer", "juice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363897.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289436, "question_id": "JcGvbaJYuiGnPMAsiHckm4", "question": "What animal is usually around the green item?", "choices": ["polar bear", "seal", "bee", "shark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289436.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157583, "question_id": "Jd3zEedUgzTN7LDDB2AtjF", "question": "What type of sink is in this bathroom?", "choices": ["corner sink", "vessel sink", "pedestal sink", "kitchen sink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540638, "question_id": "Jdew3xPpEUwWHVreAyepya", "question": "What's the design on the floor called?", "choices": ["checkerboard", "spiral", "domestic", "layered"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 42392, "question_id": "JfGQitixmiNFaRLcAifMV8", "question": "What type of information is available on the building?", "choices": ["direction", "time", "speed", "weather"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000042392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265369, "question_id": "Jfp7AREt5qg4cDAWejZKYz", "question": "What element was the roofing seen here formed from?", "choices": ["copper", "wood", "lead", "tin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265369.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577824, "question_id": "Jj3eLAeA8m9EWjXhkc3d4k", "question": "What is the most common Frisbee throw?", "choices": ["front hand", "shorthand", "long hand", "backhand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577824.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477991, "question_id": "JjDjVkY5qH7J7wX2SpZR2w", "question": "This Christmas themed parade float is on display in which state?", "choices": ["california", "arizona", "new york", "florida"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235435, "question_id": "JjPDJJydMjHVdYHAGLxg8o", "question": "Why is her hand a blur?", "choices": ["light", "speed", "photo manipulation", "faulty equipment"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9662, "question_id": "JokS5PCe5hE8ktxyGudDrw", "question": "Why is the rail in front of the train shiny?", "choices": ["train traffic", "new construction", "recently cleaned", "corrosion"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000009662.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432536, "question_id": "JpEBbNZEPorfu2cxETrnEW", "question": "A road vehicle designed to carry many passengers is called?", "choices": ["train", "truck", "bus", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432536.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383170, "question_id": "JqKqm4xFVKBUctnGhhyjcU", "question": "When this cow's around what might one hear?", "choices": ["trumpets", "screams", "bell", "harps"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000383170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137601, "question_id": "JqPZG6TAeE3oVksc8KKcJv", "question": "What type of transportation is shown?", "choices": ["air", "rail", "road", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557046, "question_id": "JrsnxGs6bksAEsnwkZKkUd", "question": "How many slices of bread were used here?", "choices": ["one", "three", "four", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467666, "question_id": "JsBqbbQX8LvK2wLyrrDb45", "question": "What is the man on the board doing on the yellow object?", "choices": ["flipping", "painting", "sitting", "sliding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467666.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517364, "question_id": "JseerijQNT2xkNytwntFpX", "question": "Why is the sheep lying in the road?", "choices": ["posing", "block vehicles", "resting", "injured"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25013, "question_id": "Jt6kkvXreEaiP3DrgmdB86", "question": "What are the baby bears called?", "choices": ["kid", "puppy", "cub", "kitten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537006, "question_id": "JuDbamVWvsFANJ9BgvBNzA", "question": "The animal on the left is known as king of the what?", "choices": ["serengeti", "jungle", "savannah", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537006.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451366, "question_id": "JuVSUYGsPcd6SnRvwimoPb", "question": "Why is she licking the green thing?", "choices": ["hungry", "is punishment", "is food", "found it"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000451366.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30199, "question_id": "JudiB84joqZMjPyUTJST4Y", "question": "What is the same color as the dots?", "choices": ["lemon", "cherry", "watermelon", "orange"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030199.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406975, "question_id": "JvkXmZmSTvdbAKtqo8x7Uw", "question": "What is the dog doing?", "choices": ["hiding", "stalking cat", "typing", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000406975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503586, "question_id": "JwMBvpSsDdHy89GN86aoiL", "question": "In what kind of setting is the plane landing?", "choices": ["desert", "arctic", "rural", "urban"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555558, "question_id": "JwR5FZq8yz3hhNWU42mWJp", "question": "This toilet is probably located in what kind of public place?", "choices": ["pharmacy", "supermarket", "bar", "art gallery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555558.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118635, "question_id": "JxbWrYydxQ2XKia6a9G6KA", "question": "What part of this animal helps it defend itself?", "choices": ["stinger", "fists", "horns", "pouch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118635.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348191, "question_id": "JxjUzrVhsanSeD9gTD7Bej", "question": "What might have caused the photo to be out of focus?", "choices": ["earthquake", "wind", "lighting", "motion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348191.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475662, "question_id": "JxzAcqXW2pUuozVeV389bQ", "question": "What do the signs indicate?", "choices": ["crosswalk", "selling children", "missing children", "children crossing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000475662.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565768, "question_id": "Jy3sbGzkAjUaaygsmjDRy4", "question": "If someone wanted to store their ice cream preventing it's melting which part of the refrigerator would they open to place it inside?", "choices": ["right", "bottom", "top", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331628, "question_id": "JyRhkeU5Ri3jnQX8uuFC4Q", "question": "What is usually found in this room?", "choices": ["pool table", "television", "book shelf", "shower curtain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331628.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505257, "question_id": "JyZh3BJqcEXsvNgd3ZJsgB", "question": "Which of these people would most resemble the pictured animals?", "choices": ["celebrity", "janitor", "referee", "clerk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209999, "question_id": "Jz6dpCYHRu5DCFaNqHeoVh", "question": "What is required for this activity?", "choices": ["snow", "wind", "ice", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209999.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247284, "question_id": "JzfezD4ZrcyxUUZd8isxgy", "question": "What is likely happening today on Bowie St up ahead?", "choices": ["party", "traffic jam", "road work", "armed robbery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247284.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104681, "question_id": "K2Y7JZvxVVoVgZyUgZ6o3k", "question": "What time of day would make it hardest to see this bird?", "choices": ["morning", "noon", "early evening", "night time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299925, "question_id": "K2aAHBkkPondB2KjceDPis", "question": "What is the biggest risk while surfing?", "choices": ["shark", "corals", "getting lost", "other surfer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22039, "question_id": "K4vr4GhqRF44GAYVYHLajK", "question": "How does this person feel?", "choices": ["hostile", "excited", "happy", "sad"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305054, "question_id": "K5S2uFWsbZAznkaVDhJowY", "question": "What is packed in the suitcase on the right?", "choices": ["jeans", "food", "toys", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363518, "question_id": "K62Ns3o43YzBf3J5veqb2Q", "question": "What is the man wearing on his head?", "choices": ["backwards hat", "helmet", "swim cap", "beanie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363518.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516064, "question_id": "K7rWBAoBuykRqwGkxUEAvL", "question": "In which city is Michigan Avenue?", "choices": ["santa fe", "mexicali", "charleston", "chicago"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576382, "question_id": "K8nPPmgUDVnyTaXvC92LrJ", "question": "Why is the man extending his hands in front of him?", "choices": ["to tan", "to wave", "to swim", "to catch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576382.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436135, "question_id": "KACbkrF9ZY7zDEF8CqR3ok", "question": "What is the temperature here?", "choices": ["cool", "cold", "tepid", "hot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436135.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286394, "question_id": "KBgbZnxWnHFnEFnqYMfFso", "question": "Which age is a big fan of the pillowcase near the child's forehead?", "choices": ["teenager", "grandparent", "adult", "child"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92560, "question_id": "KD8UBwr3WAZBQ5LpKuYxDc", "question": "Which animal has a pattern most similar to this animal?", "choices": ["hippo", "panda bear", "cheetah", "alligator"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092560.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3434, "question_id": "KDRHHB3C5VvUBPbPPRAceV", "question": "What are these type of cats known as?", "choices": ["ragdoll", "labrador", "persian", "chimera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003434.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369711, "question_id": "KDU3QJGLPqhBXR5LSDybWr", "question": "What valuable material is shown here?", "choices": ["stone", "rock", "ivory", "diamond"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403566, "question_id": "KDd6MrBKXswkhpfDNtacbP", "question": "What is near the plane?", "choices": ["baby", "backpack", "man", "woman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122891, "question_id": "KDzk9iNdWv96pmFaWw4Ffp", "question": "What is the appliance all the way to the left?", "choices": ["dishwasher", "toaster", "refrigerator", "oven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122891.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343343, "question_id": "KEcsnub5PVBxpcjL2pYeDd", "question": "What kind of ceramic is shown in image?", "choices": ["stoneware", "none", "porcelain", "earthenware"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 244829, "question_id": "KHKKb9Wfqs7oUJAEKS8XPa", "question": "The initials on the window are the same as a company that sells what?", "choices": ["furniture", "sports", "insurance", "bank"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000244829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54765, "question_id": "KHfu9BsRaFJzi9zQi6JCCN", "question": "Why is the buss on top of the wall?", "choices": ["is parked", "for sale", "is hiding", "is lost"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154550, "question_id": "KJaWKZfGJsWshCBWyizVqs", "question": "What type of transportation is this?", "choices": ["air", "land", "water", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465376, "question_id": "KJeZByKWTXqMwhcc7vh2sZ", "question": "The person in this vehicle is driving away from what?", "choices": ["bridge", "barn", "race track", "city"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465376.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 261820, "question_id": "KKKziceCGc5mSC2AUGzEBJ", "question": "What city is this sign in?", "choices": ["new york", "westminster", "paris", "syndey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000261820.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520498, "question_id": "KKZYcGQ3RnT8JUnUiaqc2u", "question": "What might this person be doing?", "choices": ["sleeping", "practicing", "eating", "stretching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291581, "question_id": "KL4pAN3kosyXJ2eGP8wmT4", "question": "When is the tennis match occurring?", "choices": ["morning", "afternoon", "midday", "night"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444333, "question_id": "KLgzJPqXCCC6t5ZUbzmTHL", "question": "What does the animal in the center have on its feet?", "choices": ["paws", "wings", "hooves", "talons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73475, "question_id": "KM32bcDTwY8zUVP24nFofn", "question": "What is the likely relationship between the animals?", "choices": ["parent-child", "strangers", "siblings", "mates"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37935, "question_id": "KMVTPU4sLXaUWwekbuLXNH", "question": "Why would you sit at this table?", "choices": ["to paint", "to eat", "to saw", "to work"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037935.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382647, "question_id": "KNUmD5a8aW2PK9coVbFaaG", "question": "What event is this person attending?", "choices": ["wedding", "baby shower", "concert", "birthday party"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382647.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499329, "question_id": "KNWeTErTcMTwP6tk6UrMYr", "question": "What is above the pants without touching them?", "choices": ["tie", "glasses", "baseball cap", "belt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417920, "question_id": "KNtTFwtsnuwdxLExa6KBPE", "question": "In what century was this sport invented in Hawaii?", "choices": ["6th", "4th", "12th", "16th"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000417920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25629, "question_id": "KPFxpJZ7Vwqo2sKSbiXZhh", "question": "Where is this cat located?", "choices": ["bedroom", "bathroom", "kitchen", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025629.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384057, "question_id": "KQfkxK2j9C8uJ3u8a8kH64", "question": "What is next to the motorcycle?", "choices": ["street", "cow", "eagle", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329275, "question_id": "KR7UCLve2m5DYMvtrNjBCf", "question": "What do they need more of to improve the picture?", "choices": ["light", "furniture", "people", "cameras"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329275.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481637, "question_id": "KR8qEKW2oJcfZsM7Zr5MuV", "question": "Which horse is in the lead here?", "choices": ["light grey", "brown-and-white", "light brown", "dark brown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481637.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292561, "question_id": "KSVdd9ScQPGAoomK2NGbYQ", "question": "What is the weather pattern?", "choices": ["snowy", "sunny", "rainy or", "cloudy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292561.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386563, "question_id": "KSiKdup5CJjhvWdPCWy55L", "question": "What type of container is this object?", "choices": ["food", "floral", "paint", "beverage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386563.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197308, "question_id": "KTsWe76Rm2R4Phnmx69x8y", "question": "What are the colorful items used for?", "choices": ["lifting boxes", "cutting", "digging", "writing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369088, "question_id": "KUHtGHSigmkfJV6zFYhVw8", "question": "How old is the bird now?", "choices": ["five", "six", "eight", "seven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369088.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32340, "question_id": "KVeCGRooC8tWwejQZa2UgB", "question": "What is the most likely intention of the person with the blue umbrella?", "choices": ["jogging", "crossing street", "begging", "catching bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256947, "question_id": "KVr82gKbwrQQwscqHkaRaT", "question": "In which country do these persons think about taking a boat here?", "choices": ["spain", "england", "mexico", "ireland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422694, "question_id": "KWYti2pZUGk7fjkenvYLNn", "question": "What two colors are reflected by the water to make it look like this?", "choices": ["blue yellow", "green yellow", "black blue", "blue green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422694.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8147, "question_id": "KWzCmjs5vvZFqWoQ3Wc6hf", "question": "What type of device is the man playing?", "choices": ["wii", "game cube", "x box", "gameboy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190701, "question_id": "KXvVsyAYHtT7DmwfuPTYb7", "question": "What might the bucketed person seen here be changing?", "choices": ["light bulb", "gas nozzle", "electrical wire", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374943, "question_id": "KZfaRzWocri2HLJARGfETC", "question": "What is the man looking at?", "choices": ["cat", "pictures", "television", "soda fizzing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397924, "question_id": "KaekbqHY7eSRJbCLoiXucG", "question": "What are they doing with their eyes?", "choices": ["washing", "sleeping", "crying", "looking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397924.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7514, "question_id": "KbFt6TTKG4vrNbVHfAnwdP", "question": "What will change color when it's time to go left?", "choices": ["sign", "words", "arrow", "pole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31703, "question_id": "KbZUdP8M2Zd29RrRUPptLz", "question": "What design is on the plate?", "choices": ["grape", "flower", "rainbow", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031703.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250508, "question_id": "KcHXTgk8nTpyHYDj7bTWA9", "question": "What type of animals are standing on the grass?", "choices": ["zebra", "camel", "horse", "cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133127, "question_id": "KcaENngeyoMgL9pPjiVMzX", "question": "Which type animal shown here is more full of air and stuffing?", "choices": ["mouse", "elephant", "human", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81241, "question_id": "KdAMHibasRNyn5uVo2owLX", "question": "What type of fetish is being portrayed by the picture?", "choices": ["foot", "role play", "bondage", "humiliation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16498, "question_id": "KeVqoDPzM53aHUkMcGfuMu", "question": "What is the door on the left?", "choices": ["closed", "locked", "broken", "exit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472520, "question_id": "KfZKa2guh4hPe9P9KdLcHs", "question": "What is forbidden when near the red sign?", "choices": ["u turns", "parking", "loitering", "littering"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373724, "question_id": "KfxwNKcQQzduTb3YQdYQvj", "question": "In water here what should be avoided?", "choices": ["fishing", "shipping", "swimming", "boating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116038, "question_id": "Kgq88eTemxbwDHtEWjNSug", "question": "Julian Serrano is famous for?", "choices": ["racer", "cycling", "cooking", "singing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116038.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157349, "question_id": "KhnJNCNVazX5fCFhMUtggA", "question": "The stitching on its foot is meant to resemble what?", "choices": ["scratches", "nails", "toes", "shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124053, "question_id": "KjYPCRVMw4RxoWLQJfUvmW", "question": "What sort of beach is represented in the picture?", "choices": ["private", "public", "restricted", "military only"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576651, "question_id": "KkgpLd4Rz9fgDYU52M3rRc", "question": "What shredded the report on the bed?", "choices": ["child", "fan", "dog", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576651.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317760, "question_id": "KkngGraFHxp3AVpDu7CXcf", "question": "What is the dogs head laying on?", "choices": ["couch", "bed", "baseball glove", "pillow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326375, "question_id": "KmL9wAnhea22hTRLqRvBqG", "question": "What is this dog ready to do?", "choices": ["drink", "attack", "rest", "hide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295787, "question_id": "KmVA3KumQbsDyw3q4zPDft", "question": "What is this dog's owner holding here?", "choices": ["dog", "soda", "fish", "camera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295787.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2991, "question_id": "KnM6Co4ap9TTdBQBodztb9", "question": "What is this brush used for?", "choices": ["teeth", "hair", "paint", "toilet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85521, "question_id": "KnVCgBv3oRXhbFENM3FJ8m", "question": "The photographer taking this picture is inside what?", "choices": ["house", "mountain", "tent", "dog house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302032, "question_id": "KokT6J5HDnKLSCQDYoNvUo", "question": "Which bird would be last to eat the food on the ground?", "choices": ["back one", "equal", "front right", "front left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302032.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402274, "question_id": "Kosx8uLgfCo96zT89V8wKK", "question": "What is on the building in the background?", "choices": ["flag", "clock", "poster", "monkey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402274.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280483, "question_id": "Kq2YSMT8FZPTrmVyHUs4co", "question": "What type of power travels parallel to these tracks?", "choices": ["electric", "coal", "natural gas", "steam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280483.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321775, "question_id": "Kqd6GxFR8AXbUnzcvASqyS", "question": "Why are they on the platform?", "choices": ["buying train", "awaiting train", "resting", "stealing train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457419, "question_id": "KtRcrKcogyaMXr6aTZV2bA", "question": "What is keeping these up in the air?", "choices": ["power", "land", "heat", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459010, "question_id": "KtdVqyGKVVumjccXmLizoX", "question": "What is the most foolish thing the human with the camera can do right now?", "choices": ["run", "feed bear", "stay still", "yell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459010.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490423, "question_id": "KtpozaV5bJrTSLeYsfLwX3", "question": "The bus here is doing what now?", "choices": ["loading", "marketing", "stopping", "unloading"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 42051, "question_id": "Kw8ksEbnLNmxHaeAcNE5La", "question": "What is the person doing under the umbrella?", "choices": ["squatting", "running", "eating", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000042051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378084, "question_id": "KxS87x3cJyBJ5fmSJtTpUS", "question": "What action did the man just take with the disc?", "choices": ["drop", "catch", "throw", "kick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214993, "question_id": "KxdQQ5SkbMDYEo5r2pb5eV", "question": "What is this cat trying to do?", "choices": ["drink", "hide", "eat", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 502484, "question_id": "KxisTYRwZ8K4NTyoB8wtmo", "question": "What motivated this person to pose here?", "choices": ["surfing", "flash mob", "money", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000502484.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563908, "question_id": "Kysbo6rpn3YcnUxLNgwjPW", "question": "What type of services are held inside this structure?", "choices": ["church", "government", "jails", "police"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413437, "question_id": "KzCqcdbeJ7GpLGevrmbCtg", "question": "What is near the light?", "choices": ["chair", "cardboard box", "donkey", "barrel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413437.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425952, "question_id": "L2mG5stxGyWWa7fygqtuwJ", "question": "The car is being used as what?", "choices": ["mouse", "refrigerator", "phone charger", "stun gun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000425952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431582, "question_id": "L4vBb5F78rc7pZgdHbkqRN", "question": "What is needed to make this object fly?", "choices": ["electricity", "remote", "wind", "battery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387046, "question_id": "L4xr3zPxwPBiLB3nNYiwF3", "question": "How many people are touching surfboards?", "choices": ["three", "four", "six", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424280, "question_id": "L5U7qhqx5JBwHQJ2uwVwR2", "question": "What material are the utility poles made out of?", "choices": ["steel", "concrete", "aluminum", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424280.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209781, "question_id": "L5YXVDRnDSwmx8ViXRaaER", "question": "The woman with the scissors is giving herself what type of haircut?", "choices": ["undercut", "bangs", "pixie cut", "bob"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65423, "question_id": "L642JbrtaRrb6aui3hiLLA", "question": "If the man is pushed from behind where would he land?", "choices": ["brick road", "fireplace", "pool", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123826, "question_id": "L68yix5ASZeTdJDiAfVzbW", "question": "What will he most likely do next?", "choices": ["flip board", "fall down", "stop skating", "go down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123826.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 165923, "question_id": "L7NH6RTVWRPkWi8oExXTYd", "question": "What is a word that is commonly used when referring to this type of sport?", "choices": ["mountain", "court", "field", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000165923.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75781, "question_id": "L7TDgK59QSjY9yXiA3Yfcr", "question": "What safety equipment is most important if using the item on his tie?", "choices": ["oxygen tank", "helmet", "bungee cable", "parachute"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508784, "question_id": "L7r9TJnLyMerQPPXAYBjDP", "question": "What continent can this animal originate from?", "choices": ["asia", "north america", "europe", "south america"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539316, "question_id": "L8S3FDtcSaYkaW7Btr5425", "question": "Is this belongs to naval air force or air crop?", "choices": ["false", "yes", "maybe", "no"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539316.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221849, "question_id": "L8bg2rhoGH4n62NSn4YGac", "question": "What action is this person taking?", "choices": ["descend", "flip", "roll", "ascend"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221849.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435900, "question_id": "L8cYNpQ7u6MrD3W5hoRLgj", "question": "What kind of material it is?", "choices": ["ceramic", "fiber", "wood", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81423, "question_id": "LAKN42Pg7pB3vn3NyEiiGu", "question": "What type of animal is hiding among the stuffed animals?", "choices": ["guinea pig", "dog", "cat", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245155, "question_id": "LAZwQmCQGzfnoEhgRoYB2b", "question": "What is the gray stuff on the animal?", "choices": ["dirt", "acorns", "gills", "fur"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145334, "question_id": "LB8eSnP5tcARTZHmBcD5bE", "question": "On what type of surface is this bear sitting?", "choices": ["ground", "chair", "log", "stool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145334.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527798, "question_id": "LBx8t47FDjALNGGHCh7976", "question": "What country was this food first cultivated in?", "choices": ["italy", "peru", "vietnam", "nigeria"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188665, "question_id": "LCrXdPMosALru2mV4tUgJu", "question": "What is the gray carpet pad behind the cat used for?", "choices": ["foot rest", "home decor", "scratching toy", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188665.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516400, "question_id": "LCyJBzohgZ4T3cwLGtfGaX", "question": "What do the white switches on the wall control?", "choices": ["blinds", "alarm", "lights", "temperature"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516400.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183361, "question_id": "LD7Le56HJwNXMPeXkQN9e6", "question": "How would the vehicle be described?", "choices": ["brand new", "tank", "racecar", "beat up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183361.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159565, "question_id": "LEEEhHXmmgZmgUJeJJcaci", "question": "How many people are surfing?", "choices": ["four", "two", "one", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181748, "question_id": "LESyxGr8gqzceJnwh3zeEx", "question": "What is the person in the air above the red plane doing?", "choices": ["hang gliding", "sky diving", "wind sailing", "kite surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000181748.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549811, "question_id": "LEyqwxvPJGjdp3AVBBk8rE", "question": "Where is the kite being flown?", "choices": ["playground", "field", "beach", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2375, "question_id": "LGCycqmkKzq6TCUeTA4UcU", "question": "What is attached to her ankle?", "choices": ["dog", "child", "board", "bracelet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276249, "question_id": "LGmYJb7eFUNauN4eNgUznL", "question": "What are they looking at?", "choices": ["each other", "trees", "photographer", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202625, "question_id": "LK2PLRDGJmxv5VeSKXVCqz", "question": "Where is this photo taken from?", "choices": ["floor", "window", "chair", "ceiling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458063, "question_id": "LKzwcZo7PgHgCymTQDFvFA", "question": "What is the item that is on top of the pink box?", "choices": ["pitchfork", "ray gun", "mouse", "remote"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414256, "question_id": "LLjfYgy5qFVgCS8e4vhtKe", "question": "Why is the bike in the middle of a snowy field?", "choices": ["keep cold", "keep safe", "for camera", "for sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414256.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189624, "question_id": "LMBcMVAcorHevE4Js8sJyy", "question": "Why would someone sit at this table?", "choices": ["to eat", "to work", "to saw", "to sew"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189624.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33219, "question_id": "LMvFYBJqcQKgP6cdqBxLto", "question": "What is above the crossed items on the girl's sleeve?", "choices": ["nail", "cat", "skull", "pumpkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033219.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301336, "question_id": "LMyS5UTyRJC4XgWZV2jbMG", "question": "What language is this person studying?", "choices": ["russian", "chinese", "japanese", "french"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407872, "question_id": "LPCmcn9ihfcVSYWPCDcULX", "question": "What feature makes these animals different from each other?", "choices": ["eyes", "stripes", "mane", "ears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000407872.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326942, "question_id": "LPMXFmsHhBMaBheQtBjc7h", "question": "What is the air temperature surrounding this intersection?", "choices": ["warm", "hot", "cold", "mild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328523, "question_id": "LPmWaddrVtLR77z9Ptmoyf", "question": "What part of this food is usually eaten last?", "choices": ["cheese", "sauce", "crust", "plate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264008, "question_id": "LQpvshBbyBmcNEZYCMgPh5", "question": "How many toilets are in the image?", "choices": ["two", "four", "one", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8802, "question_id": "LRnvdbkPDqA5seRhhtiaov", "question": "What type of socks are being worn by the skateboarder?", "choices": ["crew socks", "ankle socks", "mid-calf socks", "knee socks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167552, "question_id": "LSHND6psphtAnxLbNR2kjr", "question": "What is the likely date?", "choices": ["february 14", "december 25", "march 17", "october 31"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167552.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443151, "question_id": "LTACwJFqgCsJbC82hC6ffA", "question": "What type of diets do these creatures have?", "choices": ["vegetarian", "carnivore", "omnivore", "fish eaters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218267, "question_id": "LVTyhZijqe5pG2FtDyE4HG", "question": "What type of transportation are these seats for?", "choices": ["air", "land", "water", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262032, "question_id": "LVecqo78JJ8U5QNmtnDw9G", "question": "What is this surface used for?", "choices": ["cooking", "writing", "painting", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262032.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520675, "question_id": "LVt5jtWbfciwzDBnhd8G9k", "question": "What part of the skateboard is missing that would be on a modern skateboard?", "choices": ["trucks", "wheels", "bearings", "grip tape"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520675.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456400, "question_id": "LVtf8pw964VZYF66TndgoQ", "question": "What is most offensive about the person seen here?", "choices": ["pants", "black makeup", "hat", "shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456400.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151429, "question_id": "LWdAvL2KAHS42wpcVurh2N", "question": "What is the approximate top speed of these kinds of animals?", "choices": ["60 mph", "25 mph", "10 mph", "40 mph"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563139, "question_id": "LXsCPrZUuPYmWZHYny6Car", "question": "What do people usually do in this room?", "choices": ["eat", "study", "wash up", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563139.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343472, "question_id": "LXwMGVgZg7L6EPrHtbmdye", "question": "Which one of these items might be traveling along with the bear?", "choices": ["televisions", "books", "flowers", "candles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343472.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7657, "question_id": "LZYSHSK9QoZEGTqTVqS66W", "question": "What makes this wave possible?", "choices": ["wind", "gas", "humans", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183818, "question_id": "LbWMzP2n4czwQFk386UmgU", "question": "What do these objects need to work?", "choices": ["wind", "electricity", "sunlight", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 300101, "question_id": "LbiKwjt8AbjQGE6MpURS3K", "question": "Which clothing item normally used in tennis players does this player lack?", "choices": ["underwear", "pants", "shoes", "socks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000300101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546250, "question_id": "Lcm6hAdtciBWz6tw5zcKe4", "question": "What is the name given to this types of dog?", "choices": ["poodle", "german shepherd", "sheep dog", "bull dog"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000546250.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318399, "question_id": "LdammSRFUnKunXwne7UsjW", "question": "What would you order at a donut shop to get this one?", "choices": ["glazed", "jelly", "chocolate", "powdered"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318399.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315931, "question_id": "LdhU3mkpMxoMEio8u3DQpN", "question": "What kind of material is the person skateboarding on?", "choices": ["stone", "steel", "wood", "concrete"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315931.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37000, "question_id": "Le58qeaQx3BsYz6aGYjtbw", "question": "Why would someone come to this location?", "choices": ["shop", "exercise", "travel", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129549, "question_id": "Le9USNtMaW6k347zrNdeLF", "question": "In which country is the stop sign located?", "choices": ["united kingdom", "canada", "australia", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465241, "question_id": "LeCU4hzmmWAsXC3hjUxajP", "question": "Where is the racquet located?", "choices": ["table", "rack", "right hand", "wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339960, "question_id": "LeJtPcUypL7ZXnuvFkJCbu", "question": "What is the person wearing?", "choices": ["bathing suit", "wet suit", "bikini", "dress"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354065, "question_id": "LefZEAHStDxDjwRnS3UMdd", "question": "What does this space look more like than a bathroom?", "choices": ["living room", "kitchen", "walk-in closet", "garage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260240, "question_id": "LezdeipGTbdpyKwEb84PGS", "question": "This car company is a division of what larger company?", "choices": ["kia", "pontiac", "general motors", "peugeot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520575, "question_id": "LfcimQhK6DkmpfH93fBqjt", "question": "Which area of this bus would passengers get on the bus from?", "choices": ["roof", "front left", "front right", "back left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270735, "question_id": "LgsBvsm8pqmUMJ3Zkgxcn9", "question": "What would one normally expect of the figurines?", "choices": ["bigger", "laying down", "softer", "wearing clothes"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000270735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436055, "question_id": "LgweT7nNukGwXLkWNVbBZP", "question": "How does the man pictured here appear?", "choices": ["ecstatic", "desolate", "angry", "serious"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508156, "question_id": "LhCxZkaKQh3y3V2Db2oeiw", "question": "What was most likely used to cut the pony tail off?", "choices": ["buzzers", "comb", "clippers", "scissors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491864, "question_id": "LhQxJ8hMs6LAwMygRFLX5j", "question": "What is the best term for the dog's state of mind at the moment?", "choices": ["boredom", "joy", "confusion", "fear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442709, "question_id": "LhrFmUM5ZrXivfJqmibbvk", "question": "Who was President of the United States in the year listed on the clock?", "choices": ["van buren", "polk", "garfield", "buchanan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460142, "question_id": "LjRhYYz4t6DzpxJm9bSCfd", "question": "What part of the zebra's body would likely get hurt by the bird first?", "choices": ["nose", "back", "stomach", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146906, "question_id": "LkQnygsyZbbCNzHumccGPG", "question": "How many clearly visible giraffes are in the image?", "choices": ["five", "one", "two", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516057, "question_id": "LmTnpCYmcM6FsnseqtRgNT", "question": "The decal on the window is meant to do what?", "choices": ["remind owner", "inform police", "identify owner", "dissuade thieves"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000516057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9455, "question_id": "LnBhKnuEG5NkkCRTH77j6V", "question": "If moving what part of the truck here must be open in order to most fully load it?", "choices": ["window", "driver's door", "passenger door", "rear door"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009455.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 241298, "question_id": "LoUVvDhrZUcgHpyJEN8vLw", "question": "The paper lining resembles the board of which game?", "choices": ["parcheesi", "checkers", "monopoly", "sorry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000241298.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560101, "question_id": "LofLPxCELworjwZ8dDMwNF", "question": "What's the man in the airs hairstyle usually called?", "choices": ["mohawk", "dreadlocks", "pompadour", "windswept"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53701, "question_id": "LqoxmmSK97ACWruYLSrb7f", "question": "What sort of clock face is in front of the building?", "choices": ["minimalistic", "roman numeral", "western", "standard imperial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 150590, "question_id": "LqxQY7Gy3vbDJftvp7gLoS", "question": "What is usually found in this setting?", "choices": ["fish", "egg", "lemur", "car"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000150590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160263, "question_id": "LrhuGm43sQUq2La7PRL4BZ", "question": "What has someone done to the object the snowboarder is grinding?", "choices": ["bagged it", "wagged it", "tagged it", "snagged it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211790, "question_id": "LryngvHy8YE6JWrooXTfR4", "question": "What is the brand of the phone in the image?", "choices": ["nokia", "motorola", "apple", "samsung"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476391, "question_id": "LsDfeQVoj3f4qTarJb6noq", "question": "What is this person trying to do?", "choices": ["eat", "rest", "clean", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476391.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37876, "question_id": "LsnXHnkL6JFoebXEgjM6ye", "question": "What is the name of the process that causes the discoloration on these objects?", "choices": ["oxidation", "evaporation", "pasteurization", "condensation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572741, "question_id": "LtLs5RtKswgwm8TPV58Mrh", "question": "A famous fictional example of these animals is Black?", "choices": ["beauty", "elegance", "glamour", "radiance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572741.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111061, "question_id": "LtMMyuu7Qvxr73wwLFDSWt", "question": "What kind of person does this look like?", "choices": ["woman", "old woman", "old man", "young man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111061.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180155, "question_id": "Lu2zyRbeWgP6KsL4AUJEU9", "question": "What is the man doing to the horse?", "choices": ["painting it", "cutting it", "brushing it", "feeding it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142296, "question_id": "LwGj5BDJs3aWdUpHga3n7r", "question": "What type of area is shown?", "choices": ["urban", "forest", "desert", "rural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391768, "question_id": "LyDBbH2t6jWdFL2b8NkRrX", "question": "Where would the large item in the foreground usually be found?", "choices": ["car dealership", "dumpster", "gallery", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53128, "question_id": "LyFjnV4sR6gy6xNxgpvWcr", "question": "What temperature can retain the surface most preferred here longest?", "choices": ["freezing", "tropical", "room", "hot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427806, "question_id": "LyMYVzq5prs2fCxo5mnBvu", "question": "Which door would someone open if they desired something to help to chill Tea?", "choices": ["bottom", "top", "back", "left side"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90923, "question_id": "LyTiAdJn4CZWD76UfiBknS", "question": "The animal here is likely to go in which direction now?", "choices": ["behind", "none", "front", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090923.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152730, "question_id": "LyZESUgCWaPLDQG6WrEao5", "question": "How many rowers are in the boat?", "choices": ["four", "three", "one", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152730.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122891, "question_id": "LzFcJDR23PrqZpWnwbutV9", "question": "How is the contraption in the middle of the room called?", "choices": ["table", "kitchen island", "bar", "stove"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122891.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155837, "question_id": "Lzc9xVEbWCHYcHNRMcDpjb", "question": "What is the giraffe doing on the sand here?", "choices": ["playing", "eating", "stalking", "walking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92454, "question_id": "LznYvAjBms5zu67QG4s6uG", "question": "What kind of road is Bent?", "choices": ["street", "boulevard", "avenue", "highway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092454.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128933, "question_id": "M3GfRWUTSE6rQg2UpqMuzW", "question": "Why would someone sit at this table?", "choices": ["to work", "to sew", "to eat", "to saw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000128933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298858, "question_id": "M5HMnmWXZjnDmz5rfAKUbG", "question": "Who would approve of the added graffiti message?", "choices": ["dietitian", "chef", "baker", "butcher"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000298858.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281254, "question_id": "M6YHxrT3VDvuVRpsxmjyWi", "question": "Which car will begin to move first once the train passes?", "choices": ["silver car", "both cars", "neither car", "red car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281254.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338981, "question_id": "M6noRDyHN4zymQMUyMkgud", "question": "What should the dog do with the blue object?", "choices": ["wear", "play", "tear apart", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338981.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92745, "question_id": "M6ueW4EzdYgtyr74vw7ENa", "question": "What are these containers useful for?", "choices": ["travel", "fishing pole", "food", "art supplies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147332, "question_id": "M7BQT3q695PHkrrcsnxkkF", "question": "What material is the door frame behind the man made of?", "choices": ["marble", "wood", "steel", "clay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147332.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330158, "question_id": "M7Cr4p3tcFgJWyTojZnf7N", "question": "What is flying in the air?", "choices": ["dragon", "wind chime", "kite", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330158.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463181, "question_id": "M7oXtw5QyetQMjtFjeWph6", "question": "Why does he have his arms up?", "choices": ["lift", "measure", "wave", "reach"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000463181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112142, "question_id": "M7w6hxZQMJAEScz6kQNb2P", "question": "To type in all caps what key would one need to press to make this easily possible?", "choices": ["f1", "f2", "alt", "caps lock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465912, "question_id": "M8Bk85ATuFurNgUubBkniz", "question": "Why are the cat's eyes green?", "choices": ["birth defect", "electronic cat", "it's focusing", "camera flash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465912.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31492, "question_id": "MA2NAmaBjx9bVooQdVeuLC", "question": "What is near the bicycle?", "choices": ["garden gnome", "egg", "cat", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233753, "question_id": "MAxxk6z5xZLykDMEMX8JZ9", "question": "What information does this fixture provide?", "choices": ["time", "warning", "temperature", "directional"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190548, "question_id": "MBC4RKRQ9ftehnhcEA5CZC", "question": "What makes those chairs easy to carry?", "choices": ["handle", "foldable", "not slippery", "cotton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59940, "question_id": "MBS2fCLrFb8e2fvqUsXVgP", "question": "What animal is in front of the zebra?", "choices": ["horse", "camel", "frog", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059940.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114008, "question_id": "MBXdW7BnDSDRwAGQRxWwsU", "question": "What establishment is this?", "choices": ["zoo", "park", "animal farm", "wilderness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 538297, "question_id": "MBe3WRYVGFRMPGsqJGKgKn", "question": "Why would someone sit at this table?", "choices": ["to work", "to saw", "to sew", "to eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000538297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268009, "question_id": "MCCnTyCe8bAmn3kicGQFFm", "question": "What are the sharp objects on the bears feet called?", "choices": ["spikes", "talons", "pikes", "claws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268009.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145386, "question_id": "MCfyHfvcficQsiwSExqrNP", "question": "What is the handled item shown here meant to clean?", "choices": ["sinks", "hair", "phones", "toilet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265992, "question_id": "MDEzeBDmuyebJCB4jNj9WM", "question": "What kind of erosion happened in this image?", "choices": ["oxidation", "hydration", "carbonation", "chemical"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265992.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575653, "question_id": "MDQuh7BiTPMQpodWizLza2", "question": "What the cat most likely try to push over?", "choices": ["dvd player", "dresser", "hat", "tv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561893, "question_id": "MDjfDh66TJBzxoMWjVVzBJ", "question": "Worshippers at this church likely believe in what Messiah?", "choices": ["jesus christ", "sabbatai zevi", "rael", "haile selassie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561893.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476166, "question_id": "ME2LLrg7Dyq85VmkwbiDen", "question": "What word refers to this animal?", "choices": ["pachyderm", "reptile", "bird", "amphibian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476166.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74983, "question_id": "MEU5CsATRyuNULLFzghqGw", "question": "What animal type is also often used to describe this type of car?", "choices": ["beetle", "grasshopper", "horse", "lady bug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074983.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293093, "question_id": "MEyvBUK3E7eo54dhU9d4Jb", "question": "What term would be appropriate to call the man?", "choices": ["quarterback", "slugger", "pitcher", "safety"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293093.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389748, "question_id": "MGJUC9pESwRpFpwiTBe4rz", "question": "What drink are you most likely to grab from the top of the cart?", "choices": ["beer", "water", "soda", "snapple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389748.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82064, "question_id": "MGYdPMTMmdL4yexUaZyvmn", "question": "The cat here is interested in stalking what?", "choices": ["grass", "birds", "image", "bird seed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352326, "question_id": "MGjuK44aenTuXFRtHGqyET", "question": "What can this appliance be used for?", "choices": ["watching", "washing", "cooling", "cooking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352326.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355390, "question_id": "MGxcWZqSbCR7NTiiRXLEkL", "question": "What is the gate made from?", "choices": ["wood", "iron", "steel", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157900, "question_id": "MHB5sabMqwbPpRVXrKAURx", "question": "How is the cat able to fit in the bowl?", "choices": ["missing limbs", "broken bones", "dead", "flexible bones"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357766, "question_id": "MJ43xT8Ekno2yooVqAHCUv", "question": "The fork to the right of the pizza is made from what material?", "choices": ["plastic", "ceramic", "metal", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343352, "question_id": "MK8ZKEvXL7PT6xEsChnkp2", "question": "What kind of game is being played?", "choices": ["olympic", "video", "card", "board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343352.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565425, "question_id": "MKNG6cofMpwQVVkCT7VppT", "question": "How are these objects controlled?", "choices": ["string", "computer", "magic", "remote"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168283, "question_id": "MKbnpJcubBhx2edv5KdHWB", "question": "What does the blue stick behind the cat help women control?", "choices": ["body odor", "hair", "body temperature", "makeup application"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186959, "question_id": "MKgiR39cw37jftqAnrBcnF", "question": "What weather event is this area in particular experiencing?", "choices": ["tornados", "rain", "snow", "hail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186959.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412502, "question_id": "MKotUhyHALLKjuUpF45eeq", "question": "The cat is standing up in order to see something on the what?", "choices": ["ground", "house", "fence", "other cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412502.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 242627, "question_id": "ML6DgTsBNrhdccfQ6N3eNu", "question": "What is the sound that is produced by the animal pictured above?", "choices": ["purr", "roar", "bark", "neighs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000242627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375587, "question_id": "MLb6zazDkcMY2d94FkSqiB", "question": "What is the purpose of the yellow container?", "choices": ["store snacks", "mail", "discarding trash", "messages"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000375587.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412329, "question_id": "MLcAx8UhzMpYFLMMLgrK2G", "question": "What do the bowls prevent from getting on the surface?", "choices": ["wax", "soot", "water", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549, "question_id": "MMHhQPA9Xwu8KiQaSPQuZS", "question": "What could easily be done to make the houses look less uniform?", "choices": ["paint", "rebuild", "relocate", "demolish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334347, "question_id": "MMyriarGbX3KA3ZVbv6VP7", "question": "What does the lamppost resemble?", "choices": ["shepherd's crook", "suv", "tire", "discus"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000334347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243893, "question_id": "MN2H8hZRzKhvUX5h7TUa35", "question": "Which of these dangers could the elephants face based on their location?", "choices": ["boats", "sharks", "vehicles", "lava"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000243893.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186416, "question_id": "MPS2xYGygNkJ3LY8NFUXrm", "question": "What is on top of the bun?", "choices": ["mold", "sprinkles", "cherries", "sesame seeds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186416.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281326, "question_id": "MQ2E7yyTRzdntbw79Jsq2A", "question": "What is causing a symmetrical reflection down the center of the building?", "choices": ["vortex", "mirror", "painting", "lens"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281326.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16923, "question_id": "MQ9CFcQvagqiYzWdbwQBeT", "question": "What will he do with the ball?", "choices": ["steal it", "sell it", "hide it", "hit it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016923.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96786, "question_id": "MQWrCZgtyKycExo2VVmvCc", "question": "There shirts are possibly made from which one of these materials?", "choices": ["satin", "cotton", "leather", "silk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096786.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382931, "question_id": "MQnnE42RchM8YPZZrmbsvZ", "question": "What did the man stick to his hat?", "choices": ["buttons", "clips", "pins", "stickers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382931.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368804, "question_id": "MRbKcmAgcJzXQJjGDBRL7B", "question": "What type of location is this dog located in?", "choices": ["barn", "home", "circus", "vet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368804.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305293, "question_id": "MRmrcrmDx2eEX8gjVjbbfD", "question": "How many bicycles do you see?", "choices": ["none", "two", "one", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305293.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186341, "question_id": "MS4LFJ2Ve27UkoczYVg59u", "question": "What is this person standing amid?", "choices": ["pizzeria", "dairy", "donut shop", "coke factory"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474855, "question_id": "MSmhD33pBoiv4mokQPu24X", "question": "To which direction is the giraffe facing?", "choices": ["east", "west", "down", "up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474855.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344136, "question_id": "MTZFvd6bZzfZ56ZkeMvoxw", "question": "What is causing the shadows?", "choices": ["moon", "candles", "lamps", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268524, "question_id": "MTgC3CpDTeoW3D6qEtrKQd", "question": "What is the company whose name is at the bottom of the phone known for?", "choices": ["bell atlantic", "desktops", "laptops", "fios"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 140494, "question_id": "MV3virHkqKVzjoHjk9Piq4", "question": "What type of location is shown?", "choices": ["arctic", "jungle", "commercial", "coastal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000140494.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99834, "question_id": "MVteLfzS9LRnmBKpad9p4K", "question": "Which item made the cut stay straight?", "choices": ["silver", "tan", "gray handle", "brown handle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099834.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104942, "question_id": "MWoTxSmLj2wRWHx8USWDxc", "question": "How is this apple being served?", "choices": ["juiced", "frozen", "peeled", "sliced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446153, "question_id": "MXVfdr6MqDDAGT5CKdiPtm", "question": "What is in the glass on the left?", "choices": ["liquid", "sand", "foam", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188186, "question_id": "MZicxkPqKxTopJLEfriZ4C", "question": "What is required for this activity?", "choices": ["water", "sun", "wind", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188186.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333849, "question_id": "MZxXPqXF8vjzi4CbaStqUV", "question": "If a gentleman urinates in here what will he do to the seat?", "choices": ["shut it", "raise it", "kneel", "turn around"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333849.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280578, "question_id": "Ma7wD9f728ejDpjd2fkLBe", "question": "What is needed for this activity?", "choices": ["water", "sun", "wind", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32216, "question_id": "MaZXmHHPETXpCkeUp2qshP", "question": "What is hanging off the rear of the zebras?", "choices": ["arm", "ears", "tail", "wings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032216.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316176, "question_id": "MajBzbne89mJc9G7sq7chJ", "question": "What does the elephant seem to be doing in the area?", "choices": ["bathing", "fighting", "racing", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316176.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190287, "question_id": "MbCKmL5Ha79NnSwvkiks9u", "question": "In which country is this street corner located?", "choices": ["canada", "england", "france", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190287.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6045, "question_id": "MbWSuSBLtYogfWJyva4nKs", "question": "What company do these buses belong to?", "choices": ["volvo", "ibm", "gm", "ford"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006045.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294155, "question_id": "MbnbhU24NeSg3fCLpYYyeq", "question": "What does the person have?", "choices": ["sword", "umbrella", "purse", "club"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320327, "question_id": "Mc4e6qXiYXTLDJgPWRcthy", "question": "What is the main food item that these animals eat?", "choices": ["berries", "seaweed", "fish", "seals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320327.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329996, "question_id": "McQsAdU9Y5s9MDZH2fhzo6", "question": "In which one of these areas can you travel via one of these vehicles?", "choices": ["canada", "guam", "uk", "virgin islands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153802, "question_id": "McSTEyCMy3iJG398pyrYJX", "question": "What is on the shelf?", "choices": ["knives", "video games", "toys", "books"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474048, "question_id": "McgZ2cvL3jfZ8EFF8xLCAU", "question": "What might these animals be hunted for?", "choices": ["ivory", "skin", "meat", "fur"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474048.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578552, "question_id": "MgmYeX4tax2M7vCv5cqoAR", "question": "What is the yellow structure likely to be?", "choices": ["mailbox", "fire hydrant", "traffic sign", "toy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578552.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400424, "question_id": "MhFuHDfoMdhmt2wStuEomn", "question": "What kind of object is closest to the ceiling?", "choices": ["pipes", "pole", "drain", "sign"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319589, "question_id": "MiuYNjeZHBevzEBBSDp3gz", "question": "What animal usually lives in the same place as this animal?", "choices": ["cat", "goat", "pig", "llama"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319589.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73625, "question_id": "MjDxuyjXaZr4ycAU5hHKHo", "question": "The woman in green is most likely using what?", "choices": ["basket", "phone", "computer", "bucket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331130, "question_id": "Mk6WyJDiXM7f4WmeQharV8", "question": "What type of writing is the above pickup painted?", "choices": ["normal", "none", "calligraphy", "italics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331130.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463295, "question_id": "MmNwqEJd8ZLyvodqer4YBX", "question": "What is the red item in the foreground used for?", "choices": ["circus shows", "fires", "cooking", "reading books"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463295.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569894, "question_id": "MmkTjkokLm9zzPSjMMs4Dq", "question": "This animal is called what when it is small?", "choices": ["cub", "puppy", "foal", "kitten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164825, "question_id": "Mn9Z2p2bN9HnxCjH64Kzz6", "question": "How are these animals enclosed?", "choices": ["barn", "pen", "gate", "fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164825.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494353, "question_id": "MnVVqPzVhG3fYiFaLitTZJ", "question": "What are the zebras doing?", "choices": ["hiding", "grazing", "mating", "fighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154474, "question_id": "Mnw6miQLbPiUT9nVQQtejc", "question": "How is this food portioned for serving?", "choices": ["sliced", "scooped", "shredded", "cubed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351082, "question_id": "MoVAMmeyzWig2nAgDhEMap", "question": "To what type entity does Hammertime refer?", "choices": ["speeder", "magician", "alien", "musician"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468641, "question_id": "MoutRD2U46mj6Bdi3mEnDg", "question": "Why is the donut in her mouth?", "choices": ["hiding it", "hands full", "eating it", "confused"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000468641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516331, "question_id": "MpVfeAdbiKEZrn3L62BjkZ", "question": "What is near the green sign?", "choices": ["branch", "feather", "arrow", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516331.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93042, "question_id": "MpjatoayVgFHw43gLJhen8", "question": "Which color is least scattered by fog or dust particles?", "choices": ["black", "yellow", "white", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093042.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390063, "question_id": "MqNfPdv6EbQXBBTC2ELjUe", "question": "Why is the man wearing glasses?", "choices": ["vision", "sun", "wind", "protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574230, "question_id": "MqsDSGiWtJaCZVdgwagZ4L", "question": "Why is the woman wearing boots?", "choices": ["rain", "visibility", "speed", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000574230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4996, "question_id": "MrFh88aoLn2oLSZf9AdiyT", "question": "What is the elephant doing with his trunk?", "choices": ["sneezing", "attacking", "throwing dirt", "prodding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77162, "question_id": "MrK2tDUrKWJtMifGbbPZ7e", "question": "Which surfer wearing which color appears to be going faster down the wave?", "choices": ["blue", "white", "green", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349680, "question_id": "MsGiwUfSw4Qo5bmSfF2ZYo", "question": "The name underneath the skull is mentioned in a Biblical book named after what prophet?", "choices": ["daniel", "samuel", "elijah", "moses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471919, "question_id": "MsQkWfXHAiJqSyhhESAvpJ", "question": "What can be put in the water to prolong freshness?", "choices": ["plant food", "salt", "fertilizer", "pesticide"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000471919.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543415, "question_id": "MsdAQjkSWYNYMz5skR7nxU", "question": "What did the owner of this item used to do?", "choices": ["dance", "sink", "call sos", "watch tv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309910, "question_id": "MtBiJ9WXJyLtf4RzkCraYd", "question": "What is next to the chair?", "choices": ["dog", "cat", "pizza slice", "bacon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309910.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293021, "question_id": "MvS4xQoH9u8HYvHukYwsNu", "question": "What is this vehicle used for?", "choices": ["hauling", "commuting", "racing", "carpooling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293021.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342419, "question_id": "MvaKUjdKfHuRdU5eBP4fKM", "question": "What object hangs from the yellow line next to the yellow stick?", "choices": ["hoop", "carabiner", "clip", "loop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558085, "question_id": "MyEAT53h3X7eRRNR5RkePh", "question": "Why is he crouching like that?", "choices": ["showing off", "hiding", "balancing", "falling"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000558085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181535, "question_id": "MysuYpmyVvkngt4ockjy3T", "question": "Why does this person have their head covered?", "choices": ["fashion", "protection", "cleanliness", "religion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000181535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80657, "question_id": "MzHGLD6yaqPXAazLmk4nG3", "question": "What function does the rack to the right of the folded towels and in back of the tub perform besides holding towels?", "choices": ["drying rack", "dehumidifier", "laundry hanger", "room radiator"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000080657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463911, "question_id": "MzRU32WGJnsseSKPKj3tZP", "question": "What kind of food group is sold at the store?", "choices": ["dairy", "veggies", "meat", "fruits"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463911.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40261, "question_id": "MzSo7ydYVLSLMEf3pVRN3h", "question": "What level difficulty is this trail?", "choices": ["impossible", "expert", "beginner", "advanced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101345, "question_id": "MzidX4xMm34iuqUr4wcfmo", "question": "What performer had a saying that can be found on the sign under the large letters?", "choices": ["snow", "falco", "vanilla ice", "mc hammer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000101345.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200780, "question_id": "MzvXpYdJkp2942W9WX3Z74", "question": "What is this cup holder themed after?", "choices": ["type writer", "keyboard", "computer", "cell phone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200780.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201070, "question_id": "N27sLBUPV6BC5M9QdrfTdw", "question": "Why has the man covered his head?", "choices": ["warmth", "protection", "religion", "fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186664, "question_id": "N2VyM8CzTVqudduAQf2CpB", "question": "What does the black item in front of the bear help with?", "choices": ["visibility", "speed", "traction", "brakes"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000186664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112291, "question_id": "N3QtiRnfzFYCoMuZvDbJKJ", "question": "Where do people normally engage in this behavior?", "choices": ["suburban town", "rural village", "big city", "small town"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000112291.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505154, "question_id": "N44nrGkoT6SSq2HbEZArme", "question": "What is needed for this activity?", "choices": ["snow", "rain", "water", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196997, "question_id": "N67zXRvbzcLM2z5ccfpgxs", "question": "How big are these animals relative to humans?", "choices": ["very large", "slightly larger", "same size", "small"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74580, "question_id": "N8vvrbtYhB8TLAiiGqtyAY", "question": "What is this group of zebras called?", "choices": ["congress", "pride", "dazzle", "tower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71570, "question_id": "N9AL47EQQrBmoPqTducerP", "question": "Which position is the cow in?", "choices": ["defense", "feeding", "resting", "ramming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401752, "question_id": "NB5kvyM2NHJyztKj22aTF8", "question": "What can be seen traveling immediately above this location?", "choices": ["submarines", "vehicles", "boats", "airplanes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337746, "question_id": "NBQrSQrvUCaSHxfhktK7rG", "question": "The back half of the table where the animals are sitting is made from what material?", "choices": ["steel", "plastic", "aluminum", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337746.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283460, "question_id": "NBrnfdf5h8yHzJxXHc4LUj", "question": "To which country does the above airplane belong to?", "choices": ["us", "poland", "netherlands", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452364, "question_id": "NCa8UCspAfotJHynH4Jz4D", "question": "What does the dog want with the pizza?", "choices": ["sell it", "eat it", "bury it", "sniff it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259916, "question_id": "NDoR9GgpG5DhWW4GAqj2Em", "question": "What type of bus is typically equipped with this type of sign?", "choices": ["no bus", "private bus", "school bus", "metro bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259916.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500551, "question_id": "NGoJfygeBzbht3XYBLJHpM", "question": "What action will she take?", "choices": ["sprint", "dribble", "swing", "dunk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209743, "question_id": "NHBTd7eYSkDs8CCjdDQATv", "question": "Why do kites often have long tails?", "choices": ["efficiency", "style", "cheaper", "luck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209743.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476156, "question_id": "NHMEepD3pT3cWpqgJzGhTZ", "question": "What is the main ingredient in the brown item?", "choices": ["sugar", "jam", "fruit", "wheat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566775, "question_id": "NHeQtinbNvLAjpezwjZC7M", "question": "What British artist is known for paintings in the style of this shadow?", "choices": ["tracy emin", "banks", "freud", "hirst"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80211, "question_id": "NJPD4fzUV8u6D7b63XszZL", "question": "Why is the water brown?", "choices": ["pollution", "muddy", "algae", "dye"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 423206, "question_id": "NKQ6XBa9bN56o3kkFPcVoX", "question": "How did the bear get on top of the furniture?", "choices": ["hiding", "fell", "climbed there", "placed there"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000423206.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477992, "question_id": "NKZiDyGtFHyFvgnTLzKAWW", "question": "Where does this animal usually hang out?", "choices": ["underground", "pouch", "nest", "cave"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000477992.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156996, "question_id": "NKm6bWjHASGVKmpcUhVBe4", "question": "Where are these zebras located?", "choices": ["water", "circus", "wild", "zoo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474549, "question_id": "NKzXwddmXRCHCRj6qLtjHP", "question": "What type of building is shown?", "choices": ["church", "barn", "lighthouse", "skyscraper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11864, "question_id": "NL4PpQiWaR3bJn78X8gxCB", "question": "What is likely to be the next meal of the day eaten at the upcoming time for it?", "choices": ["midnight snack", "lunch", "dinner", "supper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 502904, "question_id": "NL97e8BqEDenonBbTABVbN", "question": "What material is directly below the pizza?", "choices": ["cardboard", "metal", "glass", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000502904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195511, "question_id": "NMTs8Agir5JM9w9KheTNPG", "question": "What is this style of attire for?", "choices": ["sleep", "swimming", "exercise", "work"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180566, "question_id": "NN2KMPR2fDBWNk99bDGYzK", "question": "What are his pants described as?", "choices": ["jeans", "dress pants", "capris", "cargo pants"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87431, "question_id": "NND66mNK4CyKPPTCu6nXPT", "question": "How many types of skating is there?", "choices": ["three", "five", "two", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087431.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433692, "question_id": "NNEF4njE8yN78Y7BK8g3PV", "question": "What material is the desk made of?", "choices": ["wood", "metal", "marble", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433692.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129238, "question_id": "NNS3MK5vFhwAkghArHhRUV", "question": "What is the tennis racquet touching?", "choices": ["ladder", "bird", "apple", "basketball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129238.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212120, "question_id": "NNhZ93eSe4xrxnCPjqaGFA", "question": "What is on the ice?", "choices": ["polar bear", "man", "goal post", "hockey puck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22934, "question_id": "NNrNLwCkVNxRKXdoDsdt5u", "question": "Which one of these functions can the machine to the left of the monitor do?", "choices": ["cool air", "print", "give light", "broadcast tv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022934.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198099, "question_id": "NPXQQKHvSK8GvJZaUMwWu2", "question": "What is the tone of the image?", "choices": ["sepia", "colour", "greyscale", "vibrant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198099.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324257, "question_id": "NQtTFj3oTaHbW98VtqnCSN", "question": "What sort of vehicle is likely the next one taken by persons riding on this bus?", "choices": ["uber", "bike", "train", "airplane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000324257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522609, "question_id": "NRF4fpenhoHvQjXnm3xST7", "question": "What made the tracks on the ground?", "choices": ["plants", "cars", "elephants", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522609.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263001, "question_id": "NSHqekTDZ5xDkMhnVWHeTi", "question": "What is the color of the toilet cleaner bottle?", "choices": ["yellow", "blue", "red", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263001.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125206, "question_id": "NSdbtXWgmSZDGA6pLnMs5t", "question": "Which one of these would be beneficial to this body part?", "choices": ["drops", "shampoo", "lotion", "polish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125206.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 165304, "question_id": "NShjeZ6SLFkoAJ3vuhqrWB", "question": "What food group is the baby eating?", "choices": ["fruits", "vegetables", "meat", "dairy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000165304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211542, "question_id": "NSifAKAThU4vXXTQ7aduQr", "question": "Who bought this bus company in 2017?", "choices": ["warrington's", "transdev blazefield", "greyhound", "peter pan"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000211542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92019, "question_id": "NVFsYhaC95ykZ8RAuHfif2", "question": "What is the plate made from?", "choices": ["glass", "plastic", "wood", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092019.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108522, "question_id": "NVwZJKM7yTcC5iWBMKfkku", "question": "The owner of this toy might want to be what when he grows up?", "choices": ["lawyer", "astronaut", "artist", "construction worker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108522.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39164, "question_id": "NXFKPS7L4MDVDmRSMBTQ8V", "question": "What are their young referred to as?", "choices": ["joey", "kids", "calf", "cub"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039164.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477728, "question_id": "NY6nc6M3bAXY6fw7ZHJ7TQ", "question": "Which children's movie character is most similar to this animal?", "choices": ["madagascar's marty", "madagascar's alex", "stitch", "shrek's donkey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477728.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256185, "question_id": "NYcEiDwjDYYeFSMCzG8cs2", "question": "What material is the blue and white item in the middle of the room usually made of?", "choices": ["sticks", "brick", "wood", "porcelain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256185.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100922, "question_id": "NYoFNgqpc2gUHiVWtpnCFF", "question": "What type of animals are eating the grass?", "choices": ["pig", "cow", "gorilla", "zebra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297849, "question_id": "NaoRj7oRuysP7My6d89Rcz", "question": "What type of destination is shown?", "choices": ["coastal", "rural", "safari", "arctic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297849.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37056, "question_id": "NaugUDw94sRyDw4cWrw9Fb", "question": "This type of performance is often seen in what entertainment venue?", "choices": ["circus", "aquarium", "theater", "stadium"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391007, "question_id": "NcVGx5i6rSLjASrSimb9yJ", "question": "Where is the money being stored?", "choices": ["luggage", "mattress", "bank", "fish tank"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391007.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439475, "question_id": "Ncg4NsiXGjmxfHUCSbTHoJ", "question": "Which one of these names would fit in with the two displayed here?", "choices": ["serena", "dominique", "tiffany", "melissa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448811, "question_id": "NcpeMmEfP2gtgmavW2ZHky", "question": "What is the person wearing?", "choices": ["backpack", "baseball cap", "goggles", "armor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406367, "question_id": "Nf4hXrSmGCBRPeeqVSRb4f", "question": "Why is the bear standing up?", "choices": ["reading", "drinking", "eating", "hiding"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000406367.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57565, "question_id": "NfJz6bdp2Cf66CjcyBzaZb", "question": "The place settings here could be termed as what?", "choices": ["festive", "restrained", "non existent", "staid"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000057565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321379, "question_id": "Nfa74z4LpdRE7RRfEPs58T", "question": "Where is the cat seated?", "choices": ["handlebar", "tire", "basket", "bike seat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321379.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550433, "question_id": "NfupuXvoyzJEhoSNoDsFfm", "question": "The light sources on the wall would normally be where?", "choices": ["in floor", "night stand", "on bed", "in window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544270, "question_id": "NfvApSoowhiRnx3yxstL33", "question": "Who would lose money if one of them escaped?", "choices": ["car salesman", "shepherd", "cowboy", "horse wrangler"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544270.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239962, "question_id": "Ngp4WxJNssXYiPtdJA7WVU", "question": "Which door must be open on the refrigerator if someone wants to add ice to their glass?", "choices": ["left", "right", "none", "bottom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239962.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367054, "question_id": "NhiCURnKfdQrt77rkiBgKG", "question": "What kind of bathroom is this?", "choices": ["plane", "home", "school", "hotel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82717, "question_id": "NiDE6kkXMuyrE9mbkGURHM", "question": "What kind of a shirt is the man on the right wearing?", "choices": ["long sleeve", "plain", "concert", "short sleeve"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082717.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248635, "question_id": "NjExDNFm4ov7x4XAFrq7rc", "question": "What part of the rightmost broccoli has been cut down the middle?", "choices": ["bud", "crown", "stalk", "floret"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248635.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557115, "question_id": "NkLrznkL9jhpxRZte7rzDD", "question": "Why is the colorful strip on the luggage?", "choices": ["easily found", "stylish", "hiding", "vandalized"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557115.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464530, "question_id": "NmYp9TkXpNekDTSHAFNunN", "question": "What is being transported in wires above this creatures head?", "choices": ["horses", "electricity", "nothing", "gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344542, "question_id": "Np3YtUMdwzyR82NaajWVfX", "question": "What could this man most likely be getting ready for?", "choices": ["clean house", "wedding", "basketball game", "skiing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456620, "question_id": "NpQYC2TMKxYrNrwXxJsyzw", "question": "What caused the two white items to become misshapen?", "choices": ["friction", "fall", "cold", "heat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456620.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125326, "question_id": "Nq9RkUi9m4ghF4CKR9wBuz", "question": "What type of phone is she using?", "choices": ["landline", "cellular", "pay", "rotary"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125326.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351670, "question_id": "NrcAxtLj5mfeKsEuMWJin2", "question": "How many people will likely be eating?", "choices": ["one", "four", "three", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81762, "question_id": "NspkXh29JvVboHe5qwAmgx", "question": "What Biblical character is often depicted with this fruit?", "choices": ["isaiah", "jesus", "eve", "john"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534221, "question_id": "NsxfdGcbmVj7DueB2CUvuU", "question": "What kind of material is the cake sitting on?", "choices": ["wood", "metal", "paper", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263290, "question_id": "NtcbR3WdDUdAZXAJKQM9rn", "question": "Besides English what language is seen here?", "choices": ["italian", "spanish", "french", "german"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263290.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490573, "question_id": "NucDAMY8Pqdzgipg5HkVdq", "question": "What type of pattern is seen at the back of the shoes?", "choices": ["stripes", "zig zag", "checkers", "dots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515906, "question_id": "Nug5f4XYr8BQv5GAUSPv89", "question": "What type of book is underneath the phones?", "choices": ["cookbook", "mystery novel", "manual", "stephen king"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351155, "question_id": "NvbcDVJapzuZXGav6LocJa", "question": "What sound is produced by the animal above?", "choices": ["neighs", "moes", "hisses", "chitters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470603, "question_id": "Nw9nAhuERhFE2bHeLAnAEJ", "question": "What does the gold topped bottle most likely hold?", "choices": ["beer", "water", "wine", "juice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470603.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29503, "question_id": "NwG9eP4qCe52aSQo5TNenH", "question": "What would be a more practical mode of transportation for the police officer?", "choices": ["monster truck", "bike", "scooter", "donkey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029503.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454461, "question_id": "NwJsWXQZWegWZK5rkLYUKH", "question": "What is this laptop being used for here?", "choices": ["zoom call", "data entry", "turking", "sales call"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93529, "question_id": "NwjjoHs5SeGQJKYZmnKhKh", "question": "What species of animal is being held?", "choices": ["canine", "rodent", "fake animal", "ursine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093529.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221285, "question_id": "NxrYs2rMF6cKBG8J9T6fVJ", "question": "What is this type of window called?", "choices": ["slider", "picture", "awning", "bay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25781, "question_id": "Ny2FLRkfTuFWECeMaXFh4Y", "question": "These tracks are set up for use by what type trains?", "choices": ["none", "coal only", "wood fired", "electric"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508903, "question_id": "NzhtbFATGePkCV6Pi6vUMv", "question": "What sort of occasion is the man shown here prepared for?", "choices": ["business professional", "beach party", "dive bar", "airline host"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000508903.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402517, "question_id": "P2SQbWYsCKyUAmb6W4yzeT", "question": "What is the complimentary color of the bench's color?", "choices": ["white", "pink", "orange", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402517.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131141, "question_id": "P3eYNsHjKsyLwZ4NaNvjpN", "question": "What grass family sweetener is used to prepare the item shown here?", "choices": ["beets", "rye", "sugar", "stevia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000131141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 530659, "question_id": "P5XbLVvi4pLH47ZG3ZHfV9", "question": "At least how many different signs are attached to this pole?", "choices": ["five", "three", "two", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000530659.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569118, "question_id": "P5bdAYPtXP6yRhPUHEsYst", "question": "What are they standing in?", "choices": ["water", "snow", "sand", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483121, "question_id": "PAdvs9KQvKAHhCpzqrtUj9", "question": "What is the elongated contraption coming down from the roof used for?", "choices": ["ventilation", "entertainment", "light", "cooking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483121.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24977, "question_id": "PAszpVnQ7qU6qAM6nxvDSn", "question": "What type stove is shown here?", "choices": ["electric", "microwave", "propane", "gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207285, "question_id": "PBeMTDoB8mTAhg5jfefVLy", "question": "What is the person pictured above doing?", "choices": ["riding", "surfing", "skating", "walking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316297, "question_id": "PC9DUToJxqqUv9njUfwgfN", "question": "What is causing some of the flowers to grow downward?", "choices": ["animals", "wires", "rotational growth", "gravity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513770, "question_id": "PCTM5ANi8Y6dbTJCGJWWnz", "question": "What has caused the water to give off a pink shine?", "choices": ["algae", "flamingos", "paint", "shrimp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513770.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37914, "question_id": "PCZo6gKgzjs7YZibzaASAL", "question": "Where is the most likely place for this trail?", "choices": ["mountain", "seaside", "jungle", "valley"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529194, "question_id": "PDAbP5ZeUAZ6Qd6pTkmykr", "question": "From the clouds in the sky above the clock tower what type of precipitation is likely to occur shortly?", "choices": ["snow", "hail", "rain", "sleet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354811, "question_id": "PDpG2V3VpzdcKMkiQYZFHa", "question": "What is the small blue flower with the white center called?", "choices": ["hyacinth", "zinnia", "carnation", "orchid"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000354811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109861, "question_id": "PE36khwJFhKS8Y3nMFudLd", "question": "What environment do these birds most enjoy?", "choices": ["grassland", "jungle", "desert", "tundra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109861.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 70634, "question_id": "PMahHakpLtA378fMaMDjXP", "question": "What are the yellow circles on the front of the vehicle used for?", "choices": ["boarding", "steering", "stopping", "light"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000070634.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 313433, "question_id": "PPUuJGkwiF2M2sAq7UFLwD", "question": "What type of dog is shown?", "choices": ["golden retriever", "rottweiler", "siberian husky", "poodle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000313433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98176, "question_id": "PPbyNDBdhRcauspNnzbpUX", "question": "What letter in the alphabet comes after the letter on the racquet?", "choices": ["m", "x", "t", "w"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098176.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 538998, "question_id": "PPqGxYHWUyTGAmqNv8oPsG", "question": "Where is this horse located?", "choices": ["field", "barn", "street", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000538998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376419, "question_id": "PQtyy95wjRDYTF2oiKazan", "question": "In which English county might you find a town that has the same name as the word after the W on the sign?", "choices": ["cornwall", "cumbria", "essex", "lancashire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74108, "question_id": "PR7A7AfEy7TzyhMyFgzsRn", "question": "What number is closest to the number on the pole?", "choices": ["143", "807", "225", "653"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074108.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416939, "question_id": "PRF5fPGFkcG7KWAmfiLuyi", "question": "What might explain why the man in the background is crouching over near the bushes?", "choices": ["creeping", "gardening", "hiding", "playing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000416939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86744, "question_id": "PRNjCmoXsLi66D6w38BSfv", "question": "What is the activity the man is performing called?", "choices": ["surfing", "windsurfing", "kiteboarding", "parasailing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086744.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566322, "question_id": "PRj9kowaZvNvowhDK4aD7x", "question": "What country's flags are at the top?", "choices": ["canada", "mexico", "united states", "ireland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566322.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145735, "question_id": "PT7KTGh42mMmmuMUZZZzJA", "question": "What kind of meat is in the sandwich?", "choices": ["riblet", "salami", "beef", "hot dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290299, "question_id": "PTgfMXGUJKBqK7fVjsd3xm", "question": "What agent acted on this laptop to close it partially?", "choices": ["maid", "owner", "passerby", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290299.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89842, "question_id": "PTwHuwNECzSwb9iuqWnEwQ", "question": "How has this food been portioned?", "choices": ["shred", "cubed", "diced", "sliced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151810, "question_id": "PVPHAsvMu8hGCHqPR7weBA", "question": "How is the smaller animal related to larger?", "choices": ["none", "offspring", "zebra inlaw", "enemy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151810.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321695, "question_id": "PVYVskvvT49pQ33omJccJQ", "question": "About what temperature is it here?", "choices": ["20 f", "130 f", "90 f", "32 f"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321695.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554511, "question_id": "PWK7iBCf944HCaocrvj5HG", "question": "What is the bear laying in?", "choices": ["water", "couch", "hammock", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000554511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414587, "question_id": "PWeET8miWVCFYJJkVHao6g", "question": "Where is the real version of the item found that appears as a sticker on the basket?", "choices": ["cave", "underwater", "right thigh", "human chest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414587.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183471, "question_id": "PXpkoXURPBLe8xJXC4FkBW", "question": "If he falls what will he land in?", "choices": ["sand", "kite", "water", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 134753, "question_id": "PXvsx9CnG2iMioaCR82tnG", "question": "How is this space heated?", "choices": ["radiator", "forced air", "fire", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000134753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570421, "question_id": "PYnBYP8HPwMbzxSVYyxVfx", "question": "What does the building resemble?", "choices": ["castle", "brownstone", "apartment", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415729, "question_id": "PZEhNFQDN22zZuxTw2Rddw", "question": "What do you get if you add ten to the number on the shirt?", "choices": ["55", "36", "40", "12"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470755, "question_id": "PZLTgoKzsDXR3bU3fMBUmU", "question": "What would most likely cause the giraffe on the brown surface to move?", "choices": ["bad weather", "kids playing", "incoming vehicle", "predators"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470755.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57204, "question_id": "PZg3UgYNjfiFLTg4reuvP2", "question": "What is the man using the umbrella to protect his head from?", "choices": ["rain", "sand", "waves", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057204.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485646, "question_id": "PaPDDixriMcAX5KLvrqVKZ", "question": "What material was the first computer mouse made of?", "choices": ["plastic", "carbon", "wood", "steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95570, "question_id": "Pafji2sGzgtoywT4uGAstE", "question": "What is most effective if you see a child run in front of you car at this location?", "choices": ["scream loudly", "curse", "honk", "stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 405946, "question_id": "PbBybFkN6uQHzDGMqQnM4n", "question": "What type of object is in the yellow bowl in the coup that the white cat is attempting to get?", "choices": ["water", "food", "chew toys", "yarn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000405946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508648, "question_id": "PbjVADepdBmxd6tc9Zf7jW", "question": "The colors on the hydrant resemble what?", "choices": ["dalmatian", "papaya", "crow", "candy cane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145535, "question_id": "PbjovssJ9hKde663kyPX4o", "question": "The cats huddle here why?", "choices": ["mice food", "beverages", "mouse pad", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51420, "question_id": "PcxZDEChYtBrt7Jvfd4noa", "question": "Which one of these companies makes items that would have a similar taste to these?", "choices": ["stanley", "hershey's", "black decker", "nasoya"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051420.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336282, "question_id": "PdTKJPamMpRKbUVyiUeujR", "question": "What is the woman pressing?", "choices": ["button", "weights", "dough", "juice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 208488, "question_id": "PddEkZJK7orghjPrcFW5Cp", "question": "What country does this plane originate from?", "choices": ["italy", "england", "germany", "greece"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000208488.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466709, "question_id": "PdwsXhYTVMQvqb85KDZJsn", "question": "What is the man currently using his shoe for?", "choices": ["running", "beer holder", "walking", "soccer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48592, "question_id": "Pe9HbVpmkgL5roVjCKKA4v", "question": "Why carbon fiber is used in snowboard?", "choices": ["flexibility", "gravity", "grip", "force"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25303, "question_id": "PeLdFdMLMkaqhRbncoFFp9", "question": "What flavor are pink donuts?", "choices": ["vanila", "chocolate", "scotch", "berry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146486, "question_id": "PfvD3MUgf2497Upc8qUu4Z", "question": "What usually goes into the white basin?", "choices": ["water", "flowers", "sand", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146486.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 398348, "question_id": "PgkKVyT5AeJd4Mw24gB5dJ", "question": "What writing are you most likely to see on the train?", "choices": ["6732", "cn", "train", "bnsf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000398348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488669, "question_id": "PgkWYkqqDKFnK8ZkRy4DPw", "question": "What is the name of this dish?", "choices": ["cinnamon tart", "cheesecake", "apple pie", "quiche"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328795, "question_id": "PiSJLq6RyFGmCiECteyZH3", "question": "What is the person riding?", "choices": ["goat", "horse", "surfboard", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328795.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139770, "question_id": "PifH4o5obrDmbtYUBpiLNM", "question": "What are the shoes he's wearing for?", "choices": ["basketball", "skateboarding", "running", "surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139770.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267107, "question_id": "PjVUpNZDRHmkfFkUCrCxD2", "question": "What facility is shown here?", "choices": ["library", "office cubicle", "none", "mall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267107.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 165304, "question_id": "PkMfyTP6B8jyo7HABMAf9K", "question": "Which utensil will he start using first?", "choices": ["spoon", "fork", "chopsticks", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000165304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 423661, "question_id": "PkyDmNXbtf7UWHrHQQXiFV", "question": "This would be a good gift for a child who wants to go into what field?", "choices": ["engineering", "medical", "athletics", "horticulture"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000423661.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297057, "question_id": "PmYJ25fZ26a2wAsL62Nb98", "question": "The person who goes first in the game played in this room does what?", "choices": ["wins", "breaks", "flip coin", "twirls cue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155084, "question_id": "PmhERWw647NFjcsxXhSocn", "question": "What kind of a setting is this?", "choices": ["beach", "daytime", "zoo", "nighttime"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449589, "question_id": "PnTBzm3KT5P5i3THFA2PwA", "question": "Which type of zebra is in the above picture?", "choices": ["plain", "mountain", "none", "grevy"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000449589.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481249, "question_id": "PnyGPsVbvpbKZq6d2mJraG", "question": "How did persons in this valley now get there?", "choices": ["train", "bike", "camper truck", "motor cycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565145, "question_id": "Po9Wc5qbsz9gMnXPbimNvy", "question": "What is this man likely recording?", "choices": ["construction", "proposal", "traffic", "another skateboarder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565145.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22055, "question_id": "PoFZbxyNPi3vpiotR3e68W", "question": "What did the man do to his hair?", "choices": ["comb", "laser", "shave", "grow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338409, "question_id": "PoHKrEiBen3RzieoptnBAn", "question": "Which cities fire department uses the red round item?", "choices": ["mexico", "new york", "okla", "santa fe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338409.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463871, "question_id": "PoHjGtwM9W7zeKQATVipVu", "question": "What is the item in the center of the building tower used for?", "choices": ["predicting weather", "flinging apples", "telling time", "deterring pests"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 322087, "question_id": "PoXMuo3ztwHt6oaRUZNejz", "question": "The fruit here is cut in what manner?", "choices": ["moth cut", "none", "bisected", "chopped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000322087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123394, "question_id": "PpTAcwkM58XzoWs3LgnoKF", "question": "What was done to the sign that changes the messages meaning?", "choices": ["shape", "paint", "vandalism", "location"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215267, "question_id": "Pr2auXfLEBBqy5ENdGkJxq", "question": "Her headgear is useful in case of what?", "choices": ["headache", "fall", "cold", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24114, "question_id": "PrfsRMFfZYzod2kQuhdCJK", "question": "What can the objects on their heads be used for?", "choices": ["fighting", "mating", "grazing", "drinking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024114.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439408, "question_id": "PtUfsVxRsixptdYGTFhwFP", "question": "What purpose does the train still serve for people?", "choices": ["wind block", "historical", "aesthetics", "transport"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 580153, "question_id": "PtXPL4FJJ8YiVXw6QUXinx", "question": "How was the picture of the person here oriented for display?", "choices": ["rotate 360", "rotate 45", "rotate 180", "rotate 90"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000580153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229693, "question_id": "PuBHBiZ9v35s3uV5DAPLGz", "question": "How many different flavors of Rock star energy drinks are there?", "choices": ["25", "14", "32", "15"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229693.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6045, "question_id": "PvtbG5LcyrY4dazKFTYghe", "question": "What is the name given to this type of bus?", "choices": ["cargo bus", "luxury bus", "double deck", "school bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006045.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549681, "question_id": "PxHuKM6qXj7xSDD788T4qy", "question": "This train is carrying what now?", "choices": ["money", "vacationers", "dogs", "freight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254530, "question_id": "PxuLETida4yZy5tDsvvY8E", "question": "The item the man is holding protects against what?", "choices": ["weather", "vampires", "earthquakes", "mad cows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221986, "question_id": "Py4sGm3ronCVo5GwqpoV4f", "question": "This animal is classified as a what?", "choices": ["herbivore", "omnivore", "cannibal", "carnivore"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221986.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314548, "question_id": "Pz6WrF5gmzvBZHcrQPTyfS", "question": "The person is holding on to what?", "choices": ["lasso", "string", "collar", "basket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184593, "question_id": "Q2LJT2syPBUMHWgoyrVFCg", "question": "What material is used to surround train tracks?", "choices": ["asphalt", "cloth", "ballast", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184593.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206813, "question_id": "Q2emsWomh7m4ijmR2DevQ7", "question": "Is it semi modular kitchen?", "choices": ["ultra", "yes", "no", "deluxe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450435, "question_id": "Q2fppXyFnzosVpbyEucaqM", "question": "What are these devices intended to tell you?", "choices": ["speed", "time", "temperature", "date"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197149, "question_id": "Q3mvBFpnRZrjHN97sQNCXS", "question": "What habitat is this?", "choices": ["river", "tundra", "marsh", "savanna grassland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197149.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 509670, "question_id": "Q5HXXTxWt66yWpkAqfjzNL", "question": "What material are these bottles made of?", "choices": ["paper", "glass", "aluminum", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000509670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372818, "question_id": "Q5N6AZyfjqgqbg57GjGCao", "question": "What type person sleeps here?", "choices": ["man", "nobody", "lady", "elderly uncle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376425, "question_id": "Q5ST3GQJwAacmsfqMTN2vj", "question": "This zebra must love eating what?", "choices": ["cereal", "grass", "apples", "leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179601, "question_id": "Q5bcVaUC3n7DajBsAbLDxA", "question": "If someone wanted to go somewhere to read which way should they turn here?", "choices": ["straight ahead", "right", "left", "uturn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45319, "question_id": "Q6TLVWkAyWC4xwdCbJgf2b", "question": "On what type of surface is this cat laying?", "choices": ["table", "bed", "mud", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316896, "question_id": "Q7Fa47Amc95oK93wvWenUW", "question": "What is connecting the mouse to the laptop?", "choices": ["electrical wire", "sound waves", "rf", "magnetic force"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000316896.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93265, "question_id": "Q7NFi8vhiTw6kfoMzLuwRH", "question": "What protective gear should this person wear?", "choices": ["ear muffs", "knee pad", "helmet", "goggle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433681, "question_id": "Q7i2x6bALevtJgi3DmhZap", "question": "During which time of the day is the cat in the luggage pictured?", "choices": ["noon", "night", "morning", "afternoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311198, "question_id": "Q86f6NaXqrowQNNR2kmcET", "question": "What is needed to cook the yellow colored food in this bowl?", "choices": ["air", "sugar", "rocks", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558261, "question_id": "Q9qmd3gk6pRtbFoKwPQhaZ", "question": "What kind of heating is setup in the room?", "choices": ["central heat", "forced air", "liquid heating", "radiation heat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34255, "question_id": "QA6mjUMHiCePXD2voGceQ2", "question": "How many people would be needed to fill all these benches?", "choices": ["hundreds", "dozen", "two", "one"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000034255.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 405917, "question_id": "QCEkbH3KNJRK4EHL7jwSuA", "question": "What country is this intersection in?", "choices": ["england", "sweden", "germany", "russia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000405917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286652, "question_id": "QCc4qJJR87yBkuqXparA7j", "question": "Which classic Greek figure is this picture reminiscent of?", "choices": ["hippolyta", "athena", "narcissus", "odysseus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286652.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36894, "question_id": "QDZQZSLnJFrtJV4xeWHr5F", "question": "What object is the cat hiding under?", "choices": ["remote", "post-its", "headphone", "router"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424414, "question_id": "QDbFJnWCn93YZungCbqDBZ", "question": "Cutting down the blind spots as long as the object is in their focus in which mirror?", "choices": ["side", "none", "back", "front"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510878, "question_id": "QDiAGccmvU8tpyzhpW6Szs", "question": "What part of her body is being protected with equipment?", "choices": ["head", "elbow", "hip", "knee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510878.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353708, "question_id": "QE5tmD24fKQBsK273DLw5K", "question": "The border design is made of either real or fake what?", "choices": ["copper", "gold", "silver", "diamond"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366800, "question_id": "QEDtDrDJ2NWn9uv8bhFABU", "question": "What food item is manufactured from these animals?", "choices": ["honey", "milk", "pork", "silk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368804, "question_id": "QEfTwnQrwkmywgE6pd87io", "question": "What would be a good replacement for the marbles?", "choices": ["water", "nails", "pasta", "coins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368804.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226094, "question_id": "QEv2jhhHsPC7x2MpL9gNZL", "question": "The mechanism here does what to water?", "choices": ["purifies it", "heats", "chills it", "filters it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226094.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495787, "question_id": "QFz7TwNKF497DLS5z4z4z8", "question": "Why are so many zebras in the water?", "choices": ["cooling off", "fighting", "hiding", "going home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495787.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402517, "question_id": "QJfyZzPAVyN9zJSgf2TxXK", "question": "What type of location is on the left?", "choices": ["school", "park", "greenhouse", "garden"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402517.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96736, "question_id": "QJwTj76K5MUjE5MeE9ieZY", "question": "Those dark silhouettes amid the turquoise are made by what?", "choices": ["books", "people", "paint", "statues"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096736.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569882, "question_id": "QPfULRTBnTRDShMGgtFjCt", "question": "What would one expect to get in this establishment?", "choices": ["donuts", "personal training", "book", "haircut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569882.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131975, "question_id": "QPy8uRMLq8m9P7rKfQ6ro2", "question": "What kind of outfit is the man wearing?", "choices": ["hoodie", "tuxedo", "t-shirt", "wetsuit"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000131975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239344, "question_id": "QQWS9PevyifDmWRi9WD7SN", "question": "What is the large green item?", "choices": ["saucer", "baseball bat", "bean", "crown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239344.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347518, "question_id": "QQsFUTbKjmrAzJuLeVpFT5", "question": "What vehicle is the suitcase being loaded into?", "choices": ["car", "boat", "bus", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347518.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309931, "question_id": "QRTX5VvpUi4xcFxVKZiDcS", "question": "What can be said about the substance coming out of the giraffe?", "choices": ["frozen", "liquid", "solid", "melting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309931.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54170, "question_id": "QRZTa5ojBfN5tLCxg896gM", "question": "What musical instrument is most visible?", "choices": ["harp", "hammond organ", "pipe organ", "piano"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129734, "question_id": "QTMZ9PuQbyhCfEdoiGa36p", "question": "What is the smallest number that appears on the side of the plane?", "choices": ["three", "six", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129734.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207456, "question_id": "QVQGdPgQaQVbCTzune9PvD", "question": "What is the car parking size in meter?", "choices": ["1.5x2", "3.5x4", "2.5x4", "2.5x5"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000207456.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548427, "question_id": "QWUVjuTebgPFjWRuHmNoX5", "question": "What might be written on this cat's collar tab?", "choices": ["owners ssn", "it's spouse", "it's name", "diet preference"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399056, "question_id": "QWYp5HmqV2gmkNVbykARF2", "question": "What part of the house is this?", "choices": ["bedroom", "kitchen", "loft", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493098, "question_id": "QZf7j9UkzSE9y23kXBe9hM", "question": "What danger might fliers on this plane face today?", "choices": ["overheating", "birds", "road rage", "icing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493098.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408704, "question_id": "QZyKgQPnKRUkxA5dhgjXej", "question": "If this truck needs to empty it's bed what part might lower by mechanism?", "choices": ["cab", "front", "rearmost", "wheel"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000408704.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275447, "question_id": "Qa9JyM2nXxe6pKgkPYHdQi", "question": "The woman is riding the white horse during which time of the year?", "choices": ["summer", "spring", "fall", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164968, "question_id": "QabvbiY9ZbdGQZLAaKDthy", "question": "What design are the guys underwear?", "choices": ["solid", "stripe", "hearts", "circle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112433, "question_id": "QbwXP6QPcBB4MFFXScHQis", "question": "What does the person have on?", "choices": ["jacket", "handcuffs", "dress shoes", "tiara"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311778, "question_id": "QdsQ6d7VB4SucizophBASu", "question": "Who placed the cat in the bag?", "choices": ["it's inlaws", "mother", "cat", "torturer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66777, "question_id": "QePpn7ZuxPznPAFAt3hvnH", "question": "What shape are the skis making?", "choices": ["octagon", "cross", "hexagon", "circle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138889, "question_id": "QfAiFWVCWcrg793w9hjc4P", "question": "What is the symmetric shape of this kite?", "choices": ["delta", "bow", "hexagonal", "box"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138889.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468881, "question_id": "QfM6b8MGHDHiDBYWMcTwfg", "question": "What kind of phone is this?", "choices": ["samsung", "nokia", "blackberry", "motorola"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374619, "question_id": "QgcDdT8r7wRLLw6FvZZTxX", "question": "What type of animal is shown?", "choices": ["aquatic", "flying", "prehistoric", "reptile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374619.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267483, "question_id": "QgxUMtnqvXtEQvNxXMEYrq", "question": "About how much of this pie is likely to be taken home to eat later?", "choices": ["3/4", "none", "all", "half"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267483.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303460, "question_id": "QhfdvqtnQqcAGqRUJpSgCz", "question": "What kind of location is this building in?", "choices": ["wilderness", "oceanside", "city", "mountaintop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567883, "question_id": "QhspmuQeyWdGp6dW9CyULQ", "question": "What company is known for making the item behind the cat?", "choices": ["mcdonald's", "dell", "green giant", "tesla"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567883.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528419, "question_id": "QiazTuswN7pAES9EjNTq6H", "question": "What type bird is shown here?", "choices": ["sparrow", "woodpecker", "hummingbird", "chicken"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424762, "question_id": "QkN9LxvviWkzht3GJmSZfw", "question": "What word describes the number of animals here?", "choices": ["duo", "quartet", "trio", "battalion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309931, "question_id": "QkYdZokX3myMaiUQxc8rZJ", "question": "What is the giraffe doing?", "choices": ["eating", "sleeping", "drinking", "peeing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309931.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73486, "question_id": "QmGteiRMeJcKy84h7xYnwT", "question": "Which animals are the people above riding n?", "choices": ["none", "horse", "donkey", "zebra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073486.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164451, "question_id": "QmSipTZ4V9GrEeftP7pFRh", "question": "How many years old is this building?", "choices": ["120", "269", "150", "75"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164451.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5836, "question_id": "Qn3B8X3WHUyH5F2TXRTojb", "question": "What time is shown on the clock?", "choices": ["midnight", "noon", "300", "442"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005836.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518062, "question_id": "QnYM6SrK2ZJxPLBNKMNg3r", "question": "What are the pans in front of the white blender made from?", "choices": ["aluminum", "copper", "steel", "cast iron"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120175, "question_id": "QoDE9dCPqhcbGkSfochsLE", "question": "What is the clock made of?", "choices": ["wood", "paper", "cardboard", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65470, "question_id": "QqNbQw59Tj7TaUpxW3EGNh", "question": "Who is allowed to use this bench?", "choices": ["bears", "anyone", "over 65", "ticket holder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36946, "question_id": "QqkYaVqWdKhawGoCr7ijBe", "question": "What type of environment is this?", "choices": ["savanna", "wetland", "desert", "rainforest"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000036946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 479276, "question_id": "QsfS7nGZLp89xRBaaA2a62", "question": "Who installed this contraption?", "choices": ["animal control", "hunter", "park ranger", "homeowner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000479276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357876, "question_id": "QtTbr6D2QP3JqXJhZd9KK2", "question": "How does the television receive its content?", "choices": ["broadcast antenna", "dvd", "cable", "streaming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65230, "question_id": "QuASEcnNjBC7dK8bprxs7n", "question": "How many toilets have the lid down?", "choices": ["two", "four", "one", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260069, "question_id": "QuAupX7AjeUM2Juu2kK6Tk", "question": "How many buses are in the image?", "choices": ["three", "ten", "one", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436322, "question_id": "QugmtCPcrr8ziZzDtRuU7K", "question": "What is the person holding on his hand?", "choices": ["bat", "nothing", "skateboard", "rubber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436322.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98254, "question_id": "QukZmh99TRk95vwJ26p6Vy", "question": "What company is known for making vehicles like this?", "choices": ["suzuki", "nintendo", "tesla", "ibm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098254.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466778, "question_id": "QvLecoaF92Yx5tRs492Bsv", "question": "What is needed for this activity?", "choices": ["sandals", "skis", "skates", "board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18646, "question_id": "QvTgxkgpJirsSH2NXFNsJs", "question": "What animals are in the photo?", "choices": ["rhino", "kangaroo", "giraffe", "bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000018646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378332, "question_id": "Qws7yvLAh9Ag672zqxF8sk", "question": "Which celebrity would the shoe likely belong to based on their name?", "choices": ["jim carrey", "tom sizemore", "john wick", "jessica alba"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378332.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253719, "question_id": "QyYDFVcehjmgaGj3DVQfgm", "question": "What number do you get if you add both numbers on both shirts together?", "choices": ["35", "29", "77", "68"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68831, "question_id": "Qyq5KdbyqHn8oeLxtdE8Xy", "question": "What is similar to what is happening here?", "choices": ["dog sledding", "water polo", "shuttle launch", "brain surgery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068831.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31701, "question_id": "Qz4fpa5Lm8FWaxA5nJAhVg", "question": "What insect is most likely to pollinate the items on display here?", "choices": ["white flies", "aphids", "bees", "ants"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353537, "question_id": "QzCbTQHzkeZFVHQ528XxBN", "question": "What brand are the shoes?", "choices": ["nike", "reebok", "adidas", "new balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271086, "question_id": "QzugcueAmhWYEaM5Wn5zX3", "question": "The item in the window cell has what to contain the bouquet?", "choices": ["cup", "jug", "vase", "mug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499471, "question_id": "R2EZJnQHMZw6aMT36UKn5y", "question": "What is the man wearing?", "choices": ["speedo", "wet suit", "sweatshirt", "tank top"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532870, "question_id": "R36muJpi2Lffxo9x9v7RgS", "question": "What are the animals standing on?", "choices": ["dirt", "water", "grass", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532870.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395119, "question_id": "R3JsNZ6YqqGvGCMzNyzF5z", "question": "What might someone do in the red brick enclosure?", "choices": ["urinate", "pose", "bathe", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395119.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532870, "question_id": "R4PXgnd6R2Cy9aGU4qRXL6", "question": "These animals might be used for which sort of meat if butchered?", "choices": ["mutton", "chicken", "near meat", "pork"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532870.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 240958, "question_id": "R4bzFU38u9PLPKYoBxqUeV", "question": "What is a smaller animal that has the same colouration?", "choices": ["mule", "penguin", "beaver", "robin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000240958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142180, "question_id": "R55dEmMaAZMVsCm3D4VL5a", "question": "What powers the light shown here?", "choices": ["electricity", "coal", "gas", "kerosene"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142180.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553423, "question_id": "R5ycCLPYZgkcTPUF4NpYmN", "question": "What method is used to dry hands in this bathroom?", "choices": ["paper towel", "terricloth towel", "hand dryer", "napkins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96121, "question_id": "R66EAWpoxf3R2S4ZdaCoJz", "question": "What are these groups of zebra engaged in?", "choices": ["running", "breeding", "dying", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096121.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390604, "question_id": "R6ADDp9MuYMEKTn2oCX5tJ", "question": "What is the animal doing in this location?", "choices": ["eating", "relaxing", "stalking", "crying"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390604.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142373, "question_id": "R6hjo8YF9hdbvTJTJWFK2h", "question": "What process created this hat?", "choices": ["baking", "knitting", "debate", "exercising"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142373.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565145, "question_id": "R6sdBWt9gnjSQea7t9NTmK", "question": "What is he doing?", "choices": ["filming skateboard", "resting", "cleaning", "shaving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565145.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458062, "question_id": "R85c6Bt6ueHncLNwg94yzR", "question": "The slope here is suitable for whom?", "choices": ["none", "beginners only", "pros only", "intermediate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 62462, "question_id": "R9PYvmUNa2ZumPyqepguGp", "question": "What outdoor water activity is the man trying to do?", "choices": ["water ski", "water hockey", "freedive", "kite surf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000062462.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100823, "question_id": "RAHapPpNv2W2s9NgW9uuJ8", "question": "Why are the giraffes so hard to see?", "choices": ["are camouflaged", "cheap camera", "are hiding", "poor lighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110520, "question_id": "RANzWUMLBTu97DrnT8dN8J", "question": "What brand of phone is the pink one?", "choices": ["iphone", "nokia", "motorola", "samsung"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90580, "question_id": "RAYAyqaK2WAbTaiUJCTGPh", "question": "In what year was the film based on this book released?", "choices": ["2004", "2015", "1975", "1988"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470186, "question_id": "RAsafMqGgAnStkTTNLQFwh", "question": "What is on top of the pizza above?", "choices": ["meat", "eggs", "veggies", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470186.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17041, "question_id": "RC2tUeQ6cgTztsqRZh3SvD", "question": "What is the cable for?", "choices": ["stopping traffic", "random", "holding hydrant", "holding pole"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000017041.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30268, "question_id": "RDWRkfVcNjDvrihdfBUabh", "question": "What kind of information is this tower used for?", "choices": ["wind", "direction", "temperature", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030268.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153213, "question_id": "REEvB8k4rzkAivXM7AFm4W", "question": "What do the names on the white signs represent?", "choices": ["buildings", "streets", "girls names", "boys names"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153213.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 149466, "question_id": "RFBDJQj6HZK4CMoET7LxhA", "question": "What is the tool in the middle used for?", "choices": ["slicing", "walking", "cleaning", "digging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000149466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558373, "question_id": "RFCQxXEf6vTTSn3RP7an24", "question": "What is to the left of the people?", "choices": ["surfers", "ocean", "whale", "boat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558373.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 246520, "question_id": "RFJY5LZ9vavuVgU24FAuaj", "question": "What type of surface is this pizza located on?", "choices": ["stove", "shelf", "table", "counter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000246520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159419, "question_id": "RJA59PRHcDg4PMmkgeg25C", "question": "What has a bunch of sugar here in it?", "choices": ["cupcake", "bagel", "birthday cake", "salmon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519464, "question_id": "RJEoZ2iC4XHoAqaVaASDRU", "question": "What type of light is this?", "choices": ["head", "flash", "street", "traffic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55590, "question_id": "RJoFxEbC4cjea3hvwnyxtr", "question": "This traffic intersection requires stops by both cars and what?", "choices": ["joggers", "bicycles", "pedestrians", "trains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202294, "question_id": "RKJUEgaeBJy7n3AKdtpi8g", "question": "What is the warning symbol on the red bag called?", "choices": ["cross", "smoking hazard", "peace sign", "biohazard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202294.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45651, "question_id": "RKf3hSCFVwwioNyY6QH22t", "question": "What form of animal is this?", "choices": ["amphibian", "mammal", "bird", "reptile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045651.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553494, "question_id": "RLPsBdp8TFWWiQgsR5RzLn", "question": "What are the giraffes doing?", "choices": ["mating", "foraging", "climbing", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553494.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133322, "question_id": "RMH7kpLX4VRcHhH9XPuBPy", "question": "What is against the wall and is white in color?", "choices": ["candy cane", "wastepaper basket", "bunny", "rapier"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133322.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553732, "question_id": "RMUYGmQK35wamnb8Anckxb", "question": "What would primarily be done on the area covered in pillows?", "choices": ["sleeping", "eating", "bathing", "gaming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553732.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388785, "question_id": "RPHNY7pWcBDszV5v4vt22i", "question": "What is needed for this activity?", "choices": ["water", "wind", "snow", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000388785.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540935, "question_id": "RPHesiHP2HRy5LxuT4Fe3W", "question": "What is the surfer trying to avoid by crouching down?", "choices": ["rocks", "wave", "sand", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540935.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161524, "question_id": "RPbEJZVJSFNprmiKTsF5rk", "question": "At the eatery ahead what option is out for today?", "choices": ["salads", "drinks", "al fresco", "chili"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31725, "question_id": "RPvcn6do5K6hm5YPhgZHNR", "question": "What is a feature of this animal?", "choices": ["quills", "trunk", "whiskers", "gills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031725.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194819, "question_id": "RQNkEgUG4P6qcT9AL3htJj", "question": "What is included on the larger keyboard which is absent on the smaller?", "choices": ["number pad", "letters", "lights", "arrows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000194819.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372447, "question_id": "RSuefZAtFgYkMLDMhz9yJm", "question": "What carved out the rocks into the current form?", "choices": ["water", "air", "dirt", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428302, "question_id": "RT9qgXEEkKWa4WVui3hco2", "question": "What wrestlers name is most similar to what is written on the man's shirt?", "choices": ["c.w. anderson", "blue meanie", "chris kanyon", "huh"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567202, "question_id": "RTKpaoM8FyGHHdFd9du95t", "question": "What type of hat is the man in the middle wearing?", "choices": ["beanie", "newsboy", "bowler", "cowboy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567202.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528967, "question_id": "RTPA65tw7dPZyrVYnbqiBz", "question": "What is he doing?", "choices": ["grabbing board", "falling", "eating", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528967.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 166538, "question_id": "RUuWgaCuTdK2KtebtDdpEY", "question": "Why does the giraffe allow the birds to sit upon it what are they doing?", "choices": ["fighting", "eating bugs", "nesting", "hitching ride"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000166538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129646, "question_id": "RW4798xxLyExQTPMNgYgyc", "question": "What is the arrow sign telling the drivers?", "choices": ["turn left", "no u-turns", "make u-turn", "turn right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80962, "question_id": "RWJKsQokaQqt6drMMaCuiR", "question": "What is right behind this man that is propelling him forward?", "choices": ["water", "heat", "person", "motor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080962.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346315, "question_id": "RWtKT9agFwSPzJ4fCYMVxV", "question": "What is the skateboarder about to do?", "choices": ["spin trick", "grind", "drop in", "air trick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301324, "question_id": "RXnhWirbJaguH5XpUSoZQJ", "question": "What is the water vessel in front of the car called?", "choices": ["dinghy", "kayak", "tugboat", "canoe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301324.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524203, "question_id": "RXw3fp5Xrd2pb98Ftgy4A9", "question": "What type of transportation is shown?", "choices": ["road", "air", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524203.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486881, "question_id": "RYmRNanhaB9efeuMTJ8k2G", "question": "What is a championship in the sport this man is playing?", "choices": ["grand slam", "polo award", "world series", "indy 500"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490973, "question_id": "RaFvFbx4AvnLzALAC6Sa3x", "question": "How would you describe this mans stance?", "choices": ["relaxed", "ready", "lazy", "unconcerned"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490973.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71859, "question_id": "RdRWKdRJxxJE8VpMEPVa8x", "question": "Which teddy bear would be the hardest to keep clean?", "choices": ["bottom 2", "bottom 1", "bottom 3", "top 1"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071859.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192396, "question_id": "Rdiq4A62Qo65PAv9mnG4dE", "question": "What body part is this man using to maintain balance most effectively?", "choices": ["head", "rear", "arms", "legs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454228, "question_id": "ReivGzd8f2E4ZEEdyTJ8KR", "question": "What Disney character would feel at home among these animals?", "choices": ["sebastian", "simba", "dumbo", "jiminy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114655, "question_id": "RfPSeRcGyvjPZVbGezyCa7", "question": "What is the most likely size of the little girl's shoes?", "choices": ["12", "ten", "seven", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114655.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106578, "question_id": "RgDaaAoUonwEwb93i2Mfpi", "question": "What is the person who parked here most likely to get?", "choices": ["imprisonment", "cash prize", "money", "parking ticket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545105, "question_id": "RhB4MnYELYLbsg7e3kFTLP", "question": "What can be found in the room?", "choices": ["ham sandwich", "playing cards", "pool table", "faucet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545105.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190479, "question_id": "RhJUc4si7vCWStCKEKSyUM", "question": "What natural force caused the material on the ground to form?", "choices": ["evaporation", "earthuqake", "tsunami", "weathering"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190233, "question_id": "Ri9weumDaT3kZDNxSDesam", "question": "What baseball player is the man likely a fan of?", "choices": ["aaron judge", "john means", "alex verdugo", "chris sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190233.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342923, "question_id": "RiCGyc2axQDqy2HJGhN5fF", "question": "What color item is used to identify these animals?", "choices": ["brown", "black", "green", "uyellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342923.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172605, "question_id": "RiWybU68625k7aUWEaMHNK", "question": "What activity is the red item usually used for?", "choices": ["swimming", "fishing", "hunting", "camping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172605.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367968, "question_id": "RmKpJHGENgPLmQzJpMxQfq", "question": "What usually happens in this room?", "choices": ["emailing", "showering", "watching tv", "pool playing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000367968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364863, "question_id": "RmZjh66PUMm9CfmxsiUm84", "question": "How many contiguous states are further north than the state on her shirt?", "choices": ["three", "one", "six", "zero"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000364863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410727, "question_id": "Rno4G4vkjA5AdzxAC6DMvd", "question": "Why is there a picture of a bear and a girl above the bears?", "choices": ["girl owns", "bears own", "history", "there before"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000410727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337134, "question_id": "RnyEtzf9KaZQzLBtHHQgcw", "question": "What feature is this animal known for?", "choices": ["gills", "stinger", "trunk", "quills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337134.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277682, "question_id": "RnzwkLDpdLfCBkcwN2J3FS", "question": "What does the yellow lane on the left allow for?", "choices": ["parking", "biking", "bus stops", "crossing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000277682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192526, "question_id": "RpS5kCBG3ciTwn94YWjP24", "question": "What is common about the following animals?", "choices": ["are mammals", "long nose", "are small", "they fly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192526.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 148369, "question_id": "RqwMuy2jHjhhkJQqutkCEm", "question": "Which person is in the greatest danger if pushed?", "choices": ["striped shirt", "white hair", "sleeveless guy", "glasses man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000148369.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345164, "question_id": "RrJNaBPzKPYpzQjNF8GVuJ", "question": "What might have prompted someone the make scratches on this meter?", "choices": ["emergency signal", "rain", "parking ticket", "sos"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345164.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 278105, "question_id": "RsiDkcN6Jgq8ZdgPdhrQiV", "question": "The posts here hold what up?", "choices": ["wire", "food", "signs", "giraffes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000278105.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516331, "question_id": "Rsy5v78yarJ7ieyDLeo9PT", "question": "What season is this definitely NOT?", "choices": ["autumn", "summer", "spring", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516331.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59052, "question_id": "RtV57Su6SMyk4xKKriwnBG", "question": "What type of cargo is this train carrying?", "choices": ["passengers", "coal", "chemicals", "grain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059052.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499329, "question_id": "Ru3tSCWaVnLPNejURiUNL5", "question": "The man is most likely from where?", "choices": ["gabon", "usa", "germany", "rwanda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41760, "question_id": "RuAWUHGhZYVRViExDNo3sx", "question": "Why is the little girl holding poles?", "choices": ["for fashion", "self defense", "as weapons", "for balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291766, "question_id": "RuVxoBxD7VZmtQLZXASN4M", "question": "What is the food on?", "choices": ["table", "ground", "plate", "napkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540510, "question_id": "RuecZqG28hbEJgcGxiUr5i", "question": "What can be said about or used to describe the surfer's attire?", "choices": ["dress clothes", "wetsuit", "bikini", "non-existent"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540510.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169039, "question_id": "Rug7ppgWLCG4tpWEmCC5zB", "question": "What shape is the pattern on the tie?", "choices": ["square", "circle", "diamond", "rhombus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90013, "question_id": "RukRF47594WHWMzfGJoXSQ", "question": "On which street are you encouraged to Stop Trying?", "choices": ["phone tree", "elm", "main", "4th"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116392, "question_id": "RutQMRGkGev4bLafnnst3z", "question": "What is the material of the item that shows the horse made of?", "choices": ["bamboo", "wood", "pic", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570306, "question_id": "RwWYXze2fCUhGsJwpS3q3y", "question": "The animals seen here are identified by looking at what colored item?", "choices": ["black", "red", "light blue", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170915, "question_id": "RxhvctiVVPWysTNA5rYKik", "question": "The horses on the field are grazing during which season?", "choices": ["spring", "fall", "summer", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170915.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339091, "question_id": "Rxkg5xyaeRR3SqAPShQTRm", "question": "What activity is the zebra performing here?", "choices": ["grazing", "drinking", "sniffing", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339091.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539703, "question_id": "RyCJranXW6GNiaUwiL9YQr", "question": "Who is the sign on the pole supposed to educate?", "choices": ["giraffes", "guests", "zoo keepers", "investors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539703.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451457, "question_id": "RzALDGAoBgGg4ffrJWKV6C", "question": "What might a child build on this surface?", "choices": ["fort", "sandcastle", "snowman", "mudpie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110427, "question_id": "RzD43iFbUfBCEgtwJRENnZ", "question": "What is the number for?", "choices": ["identification", "safety", "nothing", "style"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252466, "question_id": "RzjMC7SPbYPjJv9q2wXjND", "question": "What is the Italian word for this animal?", "choices": ["nesti", "orso", "ours", "medve"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172235, "question_id": "S2B32y8ey3oSNSr77nZUh5", "question": "The animal here must do what to see their babies?", "choices": ["swim", "sing", "hibernate", "incubate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474783, "question_id": "S2TVE4RF9YVmWkvRAWahnD", "question": "The container shown here allows things inside it to be cooked in which manner?", "choices": ["char broiled", "fried", "steamed", "grilled"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550335, "question_id": "S2mgdvrDmbP5HFKPNP2nBc", "question": "What are these elephants ready to do?", "choices": ["sleep", "play", "eat", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399396, "question_id": "S2mufAi7SaPhZPkPv5N29U", "question": "How is the pizza portioned for serving?", "choices": ["diced", "sliced", "scooped", "shredded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506435, "question_id": "S2rnetUL4w4nqw6pCvR5CD", "question": "These fruits are in what family?", "choices": ["melon", "stone", "berry", "citrus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214890, "question_id": "S3RoVHB5shQRnt8j2mNxxy", "question": "What is this object meant to provide?", "choices": ["time", "weather", "direction", "speed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214890.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417183, "question_id": "S5Aqdarwcj9ng7qgDrgGbX", "question": "What will one see if the camera is moved up a tiny bit?", "choices": ["waist", "head", "foot", "knee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417183.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197761, "question_id": "S5T9LTJThRkxsc5xFUPLTd", "question": "What macabre scene is acted out here with a stuffed animal?", "choices": ["dancing", "funeral", "hanging", "seance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163323, "question_id": "S6ncrHD8vgsm73pDuKHK6p", "question": "What is larger than the office the gas station employee works in?", "choices": ["motor home", "car", "pump", "soda machine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163323.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67940, "question_id": "S83QLyPC7CcR6nPv3BtBUv", "question": "What temperature would these muffins be baked at?", "choices": ["350 f", "500 f", "150 f", "250 f"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000067940.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51996, "question_id": "S8hLVSGAvUSzWUfPChG8aS", "question": "What does this cake look like it's celebrating?", "choices": ["christmas", "autumn", "july 4th", "easter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170018, "question_id": "SAo8HzS3CiKA4XUNUiz5Q5", "question": "Approximately how much of the food item has been served or missing?", "choices": ["one-third", "one-fifth", "half", "one-forth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170018.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579058, "question_id": "SBadXa5NWHadBNpVNhQZmA", "question": "What can be used to describe the giraffe that is closest to the photographer?", "choices": ["old", "blind", "young", "injured"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579058.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396151, "question_id": "SBmbKGxqJ8R5epJkZn67aJ", "question": "What are the chickens doing?", "choices": ["resting", "singing", "having meeting", "seeking food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 492490, "question_id": "SCrYbWtijU9xZWV6qgWgLK", "question": "What are the type of diapers the baby is wearing called?", "choices": ["paper", "disposable", "reusable", "organic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000492490.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22992, "question_id": "SDPVBrWAnTaSgLVNKJFJZh", "question": "Where does this vehicle land?", "choices": ["dock", "parking lot", "runway", "station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022992.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378839, "question_id": "SDvbBnew4uz2CVfMdknZ2i", "question": "What sort of life saver might be likely to possess the tool to get water from this device?", "choices": ["nurse", "doctor", "sherriff", "fireman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378839.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379951, "question_id": "SE9qzd6sPndQTA9N4zCnD9", "question": "What body part is this person using to communicate her ideas here?", "choices": ["index finger", "thumbs", "knee", "mouth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379951.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 335300, "question_id": "SEXGwhGh6RP4GenJLbiCYf", "question": "What merchandising firm sponsors this match?", "choices": ["lumber lounge", "herald", "custom-pak", "walmart"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000335300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97937, "question_id": "SFPnekYMkNoLHMQTMXgycr", "question": "The mixer brand is under the umbrella of which larger company?", "choices": ["maytag", "whirlpool", "smeg", "kenwood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191671, "question_id": "SGKDfVpLh6g6epnxr6Ft79", "question": "What type of transportation is shown?", "choices": ["air", "rail", "road", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191671.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265758, "question_id": "SGcLSaZm9u2QUTDq3ecaP5", "question": "What is the dog doing?", "choices": ["herding cattle", "following cows", "feeding", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265758.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481475, "question_id": "SJUwtrezpcVnDBnbfsuJNW", "question": "XL airways belongs to which country?", "choices": ["italy", "uk", "us", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243943, "question_id": "SJeXMkpubqGbMtuhkchcab", "question": "What does the black remote near the dog control?", "choices": ["television", "xbox", "vcr", "playstation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476076, "question_id": "SM9UxhCERWmJYPh4U57BGo", "question": "What information does this object provide?", "choices": ["time", "date", "direction", "temperature"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7462, "question_id": "SMWKk9CFmpewNcoNCEsvoA", "question": "The ducks are floating on the water under what type of weather?", "choices": ["rainy", "foggy", "cloudy", "sunny"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007462.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90944, "question_id": "SMdVFAkuNyuiaFwdH9cksm", "question": "How do the price of these buildings in this location probably compare to those further inland?", "choices": ["cheaper", "can't tell", "more expensive", "same price"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090944.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314314, "question_id": "SMxUv55RYpVpDafRaXaWtZ", "question": "Where is this kitchen located?", "choices": ["home", "hospital", "school", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177351, "question_id": "SPMdRFaEAVjuad95bcrjuY", "question": "The name of the Pillsbury doll is called what?", "choices": ["dough girl", "none", "doughboy", "dough man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177351.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186023, "question_id": "SQTJJjStJnZsCkkmB76g4C", "question": "Why is the man wearing the colorful uniform?", "choices": ["to impress", "as costume", "to compete", "for halloween"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77250, "question_id": "ST6hxkqDqVV5e9mW4ybZ2N", "question": "This sign was damaged naturally by what?", "choices": ["sun", "birds", "rats", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077250.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416566, "question_id": "SURXuUmAPktz8w8eFaKJtQ", "question": "Why is the remote here?", "choices": ["holding it", "posed", "storing it", "for sale"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000416566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 497111, "question_id": "SVEn6KHzsSkLkXheFYELnT", "question": "How accurate would the average driver determine this set of signs to be in indicating directions or prohibitions here?", "choices": ["80%", "100%", "none", "90%"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000497111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 237553, "question_id": "SVudg7muCvohm4no5DNUAX", "question": "The large bathroom window most likely has what attached to it?", "choices": ["tint", "anti-fog spray", "stain glass", "clear tape"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000237553.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521329, "question_id": "SWUJ4xsNGvAxFVbqFzJS4z", "question": "What can the animal see in the glass?", "choices": ["nothing", "cats", "their reflection", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20872, "question_id": "SWxG45EyYQNCCRgZExF9ct", "question": "What kind of animal is shown?", "choices": ["reptile", "wild", "domestic", "aquatic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020872.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340414, "question_id": "SXNyyrGjC2hbdtCYHonKvd", "question": "An emergency vehicle for travel between a space station and the earth or for the recovery?", "choices": ["paraglide", "para wing", "none", "parasail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565032, "question_id": "SXZpFLziFTv6oFpV2XkGH4", "question": "What would most likely cause the elephant to raise his trunk to the barrel?", "choices": ["hunger", "fear", "curiosity", "boredom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565032.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251594, "question_id": "SZCoU4d2iSAiMCUJkpgmQk", "question": "What vehicle is usually found in this environment?", "choices": ["car", "boat", "zeppelin", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251594.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444284, "question_id": "SZXvWzEGDZMX3jQkFAtK84", "question": "Where are the giraffes in the image?", "choices": ["savannah", "zoo", "building", "backyard"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000444284.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290351, "question_id": "SbZW5SYEnQ8z6Qp63EYr8L", "question": "What instrument can be seen on the floor?", "choices": ["pencil", "pen", "crayon", "marker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290351.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428545, "question_id": "Sbhfwue49F9M7XdNSXKpRm", "question": "In what continent is the headquarter of this airline located?", "choices": ["europe", "asia", "north america", "africa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428545.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409061, "question_id": "SbzR3MmqMAqWVr2mXqUK6P", "question": "What could both of them use right now?", "choices": ["napkin", "shower", "car", "spanking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409061.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 509865, "question_id": "Sde6yDZqEYWHvWdZmNDMDv", "question": "The structure seen behind the tree tops is likely what?", "choices": ["roller coaster", "sign", "train trestle", "house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000509865.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373877, "question_id": "SdhMJGGvQxDsGuLng6ksBp", "question": "What does this man hang from?", "choices": ["helicopter", "nothing", "plane", "para sail"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000373877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476289, "question_id": "Se2RxuRw7w48vyJBGvqiwM", "question": "How many donuts were likely in this box when it was purchased?", "choices": ["seven", "nine", "six", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124396, "question_id": "Se6aVPUp3bbwLjWv94eaXt", "question": "What vegetable shown here is greenest?", "choices": ["potatoes", "carrots", "cabbage", "beef"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514394, "question_id": "SeQ3jB36g4pqitqxyb4iCd", "question": "What is this person studying?", "choices": ["botany", "french", "computer science", "guitar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112598, "question_id": "Sg2TVJJ6itnaX2dfwNbHUu", "question": "What can the jet ski do for the surfer?", "choices": ["toe him", "make waves", "feed him", "pay him"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112598.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 469057, "question_id": "SgPmF4ndN7uubGfn3DQ2xd", "question": "What type trees are greenest here?", "choices": ["conifers", "deciduous", "paw paw", "palm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000469057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10372, "question_id": "SgjXJmDyBiJxneKvuQuB7C", "question": "Which of these animals is more likely to eat the other?", "choices": ["neither", "mouse", "bird", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010372.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126386, "question_id": "SgvewBz8GukRkEUQ5PZK6a", "question": "What might grow inside the item behind the apple?", "choices": ["fish", "nuts", "chick", "mushrooms"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112046, "question_id": "Sh2f3JH96wqnykRkwFbQKp", "question": "What material was used to make the headboard?", "choices": ["aluminum", "wrought iron", "wood", "marble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351292, "question_id": "ShVKDL2D3WnnKW4EJ9cptu", "question": "The smaller toilet here is designed for whom?", "choices": ["daddies", "moms", "no one", "children"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351292.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440898, "question_id": "ShaNAJEKGezcotaUpE9yMS", "question": "What kind of biome are the giraffes on?", "choices": ["temperate", "jungle", "savanna", "taiga"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440898.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89371, "question_id": "Shnam28PwDEAdk7T3mQqTy", "question": "What does it look like these animals are doing that they are physiologically incapable of doing?", "choices": ["laying eggs", "flying", "milking", "eating"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000089371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51190, "question_id": "ShqDPDxtymaEq7gUAym44W", "question": "This hotel is located in what zip code?", "choices": ["96126", "90210", "91749", "94389"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051190.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52801, "question_id": "Sio8SjRsZDL4HSXq3roAX9", "question": "What can be said about the two people in front of the photographer?", "choices": ["attacking", "throwing", "swimming", "posing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052801.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358244, "question_id": "Siz3z57BWmJ3nyjv7X9k2T", "question": "What material are the two windows on the right made of?", "choices": ["metal", "carbon fiber", "glass", "plastic sheets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358244.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53997, "question_id": "SkSKuAHj6egS3GDGBB2hbv", "question": "The first word on the spoon originally referred to what kind of animal?", "choices": ["snake", "dog", "cat", "cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260506, "question_id": "Skz2dekC3Zefxod2U2T7B4", "question": "Where is this store likely located?", "choices": ["city", "beach", "forest", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260506.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294522, "question_id": "SnJVxKythF8gSFKaGKtdLB", "question": "What is on the stone item next to the hydrant?", "choices": ["graffiti", "grapes", "cow", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294522.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144461, "question_id": "SnNvwk3LjTxJ7KZYQyZMqS", "question": "What are the zebra utilizing at this moment?", "choices": ["their gonads", "their teeth", "their tails", "their eyes"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000144461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129564, "question_id": "SnULG5HLL89zDxuyFatuSa", "question": "What are the white strips in the tooth paste most likely for?", "choices": ["whitening teeth", "taste", "texture", "reflection"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000129564.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118054, "question_id": "SnVTE6825LVhzvNWNgTFnf", "question": "Why is he popping his head through the fencing?", "choices": ["is thirsty", "is hungry", "is lonely", "is angry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120148, "question_id": "SoaFKdY2pYHHCmjNsGuvYT", "question": "This animal likes a food product that comes from what other animal?", "choices": ["bee", "shark", "squid", "goat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120148.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27580, "question_id": "SptzJWzM8SMpdTdnUe5Tbn", "question": "What ingredient contains the most fat?", "choices": ["chocolate powder", "cream", "nut", "flour"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51664, "question_id": "Sq35pciPvwf9uCKPLDuYVu", "question": "Which one of these would make it safe to transport these animals to this location?", "choices": ["handcuffs", "tranquilizer", "whip", "treats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451723, "question_id": "SqFuscNjYxwQ4tT884acoR", "question": "What is the child trying to do?", "choices": ["hide", "rest", "play", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178851, "question_id": "SqMRgtLqP7NvdYLZNb9qjL", "question": "What might someone who is parking here need to do in about 45 minutes?", "choices": ["have icecream", "feed meter", "flee scene", "nothing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000178851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97438, "question_id": "SqrD3Pi9RTtVLhWbiBmpJf", "question": "What is the typical shape of the type of food used for this knife?", "choices": ["triangular", "oval", "round", "spherical"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097438.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56757, "question_id": "SrYaWhkWkfhEX75pLuKpDC", "question": "The bench is situated in which type of area?", "choices": ["playground", "field", "public park", "beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61644, "question_id": "SrmH7WTzFTWfboobgbG7Jg", "question": "What material is the piping that connects to the radiator valve to the right of the cat?", "choices": ["aluminum", "copper", "brass", "steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061644.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305965, "question_id": "SsMVF8tSGUfvW5eb7jU2AY", "question": "What kind of a store is the one with the sign in the forefront of the foreground?", "choices": ["deli", "record", "bulk", "supermarket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305965.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309769, "question_id": "Ssx8Zyr6hZ29U8ubV6e6zk", "question": "What kind of allergy would keep someone from eating the sandwich?", "choices": ["honey", "shellfish", "egg", "peanut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38207, "question_id": "StEU9no3qT3XLmQUso4wLT", "question": "Why is the board skewed downward?", "choices": ["too wet", "is falling", "lost", "catch wave"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000038207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170189, "question_id": "StNBwbYPwx6cgEds9rgme4", "question": "The person is holding what?", "choices": ["ski poles", "sandwich", "baby", "leash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170189.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298028, "question_id": "SuxEJnBT4wrjqjG3KLxNfL", "question": "This type of eyewear is often used during what activity?", "choices": ["sailing", "volleyball", "swimming", "bowling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298028.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417770, "question_id": "SvEJxvBJaTYXzzbckBBJDs", "question": "What is the number at the top of the bus?", "choices": ["505", "876", "112", "932"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417770.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68296, "question_id": "SvS5LJCTjNcrvSg2GGfU4B", "question": "What is he doing?", "choices": ["resting", "posing", "threatening cameraman", "breaking camera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372598, "question_id": "SxknLqagwe9ihaBC6NjpGm", "question": "Which country is this building located in?", "choices": ["germany", "united states", "united kingdom", "japan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372598.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40841, "question_id": "SxkpkYo578U9CUNzo8i5rq", "question": "What is unusual about the man shown here?", "choices": ["nothing", "eyewear", "size", "color"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040841.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127380, "question_id": "SxoCiwnawtJDPKNVHr64qs", "question": "What part of the skateboard caused the damage to the skateboarders shoe?", "choices": ["wheels", "trucks", "grip tape", "bearings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127380.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175661, "question_id": "Syn4EPZ6nXXxff6vsj2dg7", "question": "What will he do next?", "choices": ["cook", "take bite", "throw away", "put down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175661.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224964, "question_id": "SyxQt4ACHX9GszQ9JxPZnT", "question": "Why is the tarp over the car?", "choices": ["camouflage", "bug prevention", "anti-theft", "weather protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224964.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255918, "question_id": "SzrHwXPA3QhuKV9iej5gwB", "question": "What kind of movement are the zebras engaged in?", "choices": ["sidestep", "gallop", "strut", "walk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000255918.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454815, "question_id": "T2JexZ4wQh3x2ewQgYu5Su", "question": "What is near the shutoff fountain?", "choices": ["clock tower", "streetlight", "grass", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454815.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84883, "question_id": "T2hAuuaVc7ZiWTuCtfpJre", "question": "The height of these types of animals in measured in what units?", "choices": ["cubits", "feet", "hands", "meters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084883.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156161, "question_id": "T3wyk9vUVquTQ7D7sWzWxS", "question": "What is the man wearing?", "choices": ["tuxedo", "bikini", "wetsuit", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444597, "question_id": "T4GniVc5pNSCijE5MCUJck", "question": "What animal is a close relative of this animal?", "choices": ["elephant", "hyena", "wolf", "lion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444597.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470222, "question_id": "T4daWKgMWufFdBRHqkWtvs", "question": "How are these kids related to each other?", "choices": ["teammates", "siblings", "classmates", "coworkers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470222.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527880, "question_id": "T6RiHMwd9rbrbPGjjrQiWW", "question": "What mode of transport do you see on the picture?", "choices": ["railway", "water", "road", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578321, "question_id": "T6ofBkCKXFqs8aFoytturP", "question": "What is this little girl's hairstyle called?", "choices": ["ponytail", "curls", "bun", "pigtail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578321.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98657, "question_id": "T8cRUVje6NVxoqSKarMq4z", "question": "If one of these boxes was full how many total calories would the food be inside of it?", "choices": ["2975", "5000", "250", "472"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000098657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525523, "question_id": "T8fV4HLL43f4zcRBmsMD3b", "question": "What does the animal have at the end of its legs?", "choices": ["quills", "hooves", "flippers", "boots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513209, "question_id": "T9JswiLiRScCTSocGuhcup", "question": "The stickers here were placed by whom?", "choices": ["no one", "professional painter", "small girl", "manufacturer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508138, "question_id": "T9dTw6JDPiLNJt9ANZdki9", "question": "What is required for this activity?", "choices": ["boat", "bike", "car", "board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418941, "question_id": "T9kmbEVSAJhEAuxc3odyqk", "question": "What does a person have to pay to get on the large item here?", "choices": ["tithe", "toll", "tax", "fare"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418941.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100033, "question_id": "TAB9tTtUWyTc4WjfRKZZdE", "question": "Where is this game being played?", "choices": ["court", "field", "mud", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100033.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326677, "question_id": "TAxA3co4hKcri3FD9UVBqW", "question": "What is the water instance above called?", "choices": ["water", "tsunami", "ocean tide", "wave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326677.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431839, "question_id": "TD8SmWDAc3c2h2QN6qhQsv", "question": "What is a danger facing the red and yellow thing?", "choices": ["snakes", "electricity pylons", "dogs", "speeding cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431839.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161845, "question_id": "TDWrRAffVoewEpjP5MLuRX", "question": "Where are these skis located?", "choices": ["shed", "store", "slope", "porch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161845.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434348, "question_id": "TFAvNCXx2NAEfEDppaxoRQ", "question": "What type of ground is beneath the bench?", "choices": ["dirt", "grass", "clay", "bark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371751, "question_id": "TFG8rhEBvnqnDYxgykvJDJ", "question": "What is this animal doing?", "choices": ["showing off", "sleeping", "finding food", "attracting mates"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371751.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507020, "question_id": "TFa6wHPB7Z3yJBcX2UbPSp", "question": "The item in the tree is made of what material?", "choices": ["plastic", "wood", "paper", "tar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507020.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543670, "question_id": "THJUqMKUiyG6dsabRKpC9L", "question": "What is the state of the sink?", "choices": ["pristine", "rusted", "new", "remodeled"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000543670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546764, "question_id": "THUwZQZ4Gd7RYTs32YnL5y", "question": "What part of the animal on the left is closest to the ground?", "choices": ["ear", "neck", "nose", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75872, "question_id": "THiqjUSjqjueyUdKq6vs8Z", "question": "What is the most likely direction that this player will go?", "choices": ["backward", "forward", "right", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075872.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315132, "question_id": "TJNBiTGbMe47pkEbgtnDiC", "question": "This device is used to play which game?", "choices": ["cartoon", "remote game", "video game", "voice controlling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303832, "question_id": "TLG7LRxyFf87Jz3Ld6Vajs", "question": "What type of range is seen?", "choices": ["shooting", "kitchen", "mountain", "gun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303832.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188856, "question_id": "TLkjBuhyvyaJ9vqihq8Z3z", "question": "What is the vented item mounted on the wall used for?", "choices": ["drying", "ac", "heating", "dehumidifier"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188856.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447763, "question_id": "TLuEpx6wt9gCbYykdgxiE3", "question": "What is the man pointing the controller at?", "choices": ["waiter", "dog", "light", "television"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447763.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381768, "question_id": "TLuR8daH7SiKDxhnDQxL7L", "question": "What object would most people put their put in?", "choices": ["table", "shoe", "cat", "sofa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245059, "question_id": "TMd7mQy2c8RQj8YpboXipD", "question": "Why is the cord connected?", "choices": ["style", "water", "food", "power"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000245059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321076, "question_id": "TNNLCLXcxYbTXkuLsGbE7R", "question": "What type of electronic device is this 3D scene being presented from?", "choices": ["laptop", "tablet", "desktop computer", "smartphone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51607, "question_id": "TNeS8jzHPpgv7tiMnMdEPV", "question": "Which author inspired the red shirted creature depicted here?", "choices": ["milne", "potter", "dickens", "suess"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173542, "question_id": "TNngNEezrerUzNSzvvfTZL", "question": "Pressing which button will get the viewer seated here to a higher channel?", "choices": ["nine", "plus", "minus", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575609, "question_id": "TQPrWG47jMhPLjqujWp9Pa", "question": "What is the breed of this dog?", "choices": ["poodles", "bull dog", "german shepherd", "labrador"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575609.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160827, "question_id": "TR99inb5gfcUdRJoqDjtft", "question": "Where does this airline rank in terms of age among airlines of the world?", "choices": ["third", "oldest", "youngest", "second"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000160827.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325465, "question_id": "TSieJ8uKRERjPf5zPLtqY8", "question": "What is the picture on the mouse pad?", "choices": ["beach scene", "family photo", "periodic table", "calendar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325465.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51564, "question_id": "TT7t7oRnpCpsHJPjttqp9b", "question": "What keeps this animal from running away here?", "choices": ["leash", "apps", "chair", "drinks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051564.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98584, "question_id": "TTXTqBaKAkjh6XC8Ukw7wy", "question": "The animal is in what setting?", "choices": ["lake", "field", "tundra", "desert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098584.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 14040, "question_id": "TVNXQVNLdMwTRZBkC7eU9R", "question": "Where is the surfing capital of the world?", "choices": ["argentina", "paraguay", "chile", "bolivia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000014040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183998, "question_id": "TVmcH7NvehJz2bY4Rmnv8R", "question": "What is required for this activity?", "choices": ["wind", "rain", "snow", "ice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558315, "question_id": "TXAU9CYPPnX4CrisWewP8N", "question": "What is seen in the sky?", "choices": ["helicopter", "airplane", "rainbow", "kite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296237, "question_id": "TYLQ7PUYzSS3fS9Qakcrq8", "question": "What profession uses the item that the person has on their eyes?", "choices": ["accountant", "chemist", "chef", "lawyer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98921, "question_id": "TZdBVsKTwoTkPFnKti9UNu", "question": "What plant parts is this animal eating here?", "choices": ["blades", "leaves", "grains", "roots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098921.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458845, "question_id": "TZsSnBdZGbb6W6gbZDNiLE", "question": "The pan full of food is about enough to feed a family of what?", "choices": ["twenty", "four", "eight", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458845.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555505, "question_id": "Tam9aVcG7yeG7NoTK2piy6", "question": "Where is this sport played?", "choices": ["field", "mud", "court", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7266, "question_id": "TaushSgbRGZYRtKheq3RcW", "question": "The mailing service advertised uses what color trucks?", "choices": ["red", "brown", "white", "purple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007266.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193375, "question_id": "TdfnKYfocuWTJ6MnDzdTVk", "question": "Why is there paper on the toilet seat?", "choices": ["prevent contamination", "hiding it", "cleaning it", "saving it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127333, "question_id": "TekKWaK9VnYptHyoyhEvQi", "question": "Why is there so much wood around the doorways?", "choices": ["decorative", "experimentation", "hiding defects", "free wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92973, "question_id": "TewYNHxqbR4KxFjVW6fvVk", "question": "What is causing the brightness on top of the grass?", "choices": ["sun", "lamppost", "flood light", "flashlight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092973.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293021, "question_id": "TfoiYLt9n9CQ6unUWcixEp", "question": "When this truck is being unloaded what part of the back might likely be raised?", "choices": ["left", "front", "right", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293021.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176844, "question_id": "TfrQhhgEXtrpMLAi8gkb8t", "question": "What will prevent people from falling while working on the clocks?", "choices": ["roof", "pillars", "railings", "walkway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176844.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321175, "question_id": "TgYgvpbPqTrLDkLxjfFBGF", "question": "What is significant about the second elephant from the right?", "choices": ["hyperactive", "it's old", "strange color", "it's young"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115621, "question_id": "Th4GSzUB4GUxsyX6LneYEU", "question": "What is flying beside the man?", "choices": ["bird", "shark", "dolphin", "surf board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115621.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35380, "question_id": "ThV3sXg5tPV799iSWWjwJi", "question": "How was the food cooked?", "choices": ["boiled", "grilled", "smoked", "baked"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035380.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 174968, "question_id": "TiGG4Ksh8ynmBnVaoCZbdb", "question": "Which cheese is being used as a topping on this food?", "choices": ["mozzarella", "cheddar", "provolone", "brie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000174968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96518, "question_id": "TjT6bKfENTLwU9SyLxggKV", "question": "Why are the large rocks around the sign?", "choices": ["waterproofing", "anti-theft", "stability", "visibility"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096518.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545576, "question_id": "TjmCVPJH8K8G5b597sgutZ", "question": "What is the main difference between the smaller giraffe and the larger giraffe?", "choices": ["lighter hair", "darker color", "shorter tail", "shorter hair"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000545576.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99042, "question_id": "TmYhWHBY3vvvu5U92Bdnww", "question": "The color of the outer ring of the frisbee is named after what?", "choices": ["fish", "flower", "tree", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099042.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254526, "question_id": "TnMAQ3LrS2hqWzDJRnzJqs", "question": "What are the glasses on the mans head helpful for?", "choices": ["seeing", "feeling", "movement", "hearing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254526.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121761, "question_id": "TnvEiPg7uimvWK32pUF4EV", "question": "What type of pasta is in the soup?", "choices": ["ziti", "penne", "farfalle", "rotini"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151030, "question_id": "Tp8SN8R3LZt5bERHhrqVbS", "question": "This outfit is most likely to be seen where?", "choices": ["circus ring", "office meeting", "beach", "punk concert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151030.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 141344, "question_id": "TpCTnX5d2MN3PyP3NvH9Vf", "question": "Which cat has better positioning if a fight ensued?", "choices": ["orange cat", "black cat", "white cat", "gray cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000141344.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540631, "question_id": "TpKhxJw4tgkAwjpyhep9jy", "question": "What would be an unusual topping for this item that is on the plate?", "choices": ["bacon", "gummy worms", "meatball", "chicken"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000540631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26364, "question_id": "TqtD33xjb8kJ5nTQ3WfKFk", "question": "What is the yellow part of the animal called?", "choices": ["beak", "legs", "eye", "spine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420657, "question_id": "TrDNN9T3bsh99Q6XCUw2Yq", "question": "What colour are the handles by the door?", "choices": ["green", "red", "yellow", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558370, "question_id": "TrFMbUbQ2CTZkuc5RX5XTx", "question": "Where is this surfer headed?", "choices": ["no where", "left", "shoreward", "seaward"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000558370.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147479, "question_id": "TrsnxjTtFBvu7Tp4hXGJB2", "question": "What is available to clean your hands?", "choices": ["liquid soap", "bar soap", "water alone", "wipes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581003, "question_id": "Ts2ykSGJ7EaXBJoTmNZy2a", "question": "Who owns this bear?", "choices": ["park", "no one", "photographer", "ranger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581003.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269286, "question_id": "TtZMphtKpT3qJ4NXpptRua", "question": "What type room is shown here?", "choices": ["grannys", "childs", "dads", "moms"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561388, "question_id": "TuG9CDamo5TnrDocU4Fygx", "question": "What animal is normally moved in this trailer?", "choices": ["horse", "ostrich", "cats", "rhea"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353738, "question_id": "TuK5og8SQ4swMZ8Z9FREkC", "question": "What is the man in black trying to do on the waves?", "choices": ["waterski", "fish", "surf", "swim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353738.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517319, "question_id": "TuPWMKp3QACeiTx3PfD34u", "question": "Which brush is older?", "choices": ["both", "left", "neither", "right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288296, "question_id": "TvwMe4xPtLKWQVYQecRneX", "question": "What is the first name of the artist that is referenced here?", "choices": ["stanley", "jay", "robert", "mike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24142, "question_id": "TwdmRf4syoqFT5zxsU9VjW", "question": "What type of water is being surfed on?", "choices": ["pool", "salt", "bathtub", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107398, "question_id": "Tx5t4NK9pfki87j5oMEv5q", "question": "What is the man pretending to be on?", "choices": ["surfboard", "roller skates", "plane", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8424, "question_id": "TxCoZcH4Jp2hSSBfbhNPgt", "question": "He was probably transported from the hospital using what?", "choices": ["car seat", "sling", "stork", "crate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286989, "question_id": "TxL2oxWiNj5erm6vRZZWk5", "question": "What might children build in this situation?", "choices": ["sandcastle", "snowman", "playpen", "mudpie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286989.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133322, "question_id": "Ty8KbvRe4qiuoqBJsAd7x5", "question": "What should be removed from this restroom?", "choices": ["sink", "water", "trash", "list"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133322.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192315, "question_id": "Tyg9sakmmxrrxkfEUtiUN8", "question": "What activity is the giraffe engaged in among the tall grass here?", "choices": ["drinking", "looking", "eating", "defecating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524740, "question_id": "U2pUMn7mXsGusx5nJgxKZE", "question": "What do the rods allow for?", "choices": ["baking", "drainage", "cooling", "heating"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000524740.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556427, "question_id": "U2sLn5rXWVQFgyqaeT2MZs", "question": "What is the airplane missing that most planes have for safety?", "choices": ["brakes", "rutter", "windshield", "pontoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394093, "question_id": "U2zbVYjT8LX6ieTxZvPVUX", "question": "What information is shown on this building?", "choices": ["time", "speed", "location", "temperature"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000394093.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570074, "question_id": "U38AhEi7KLhE9Hm8rVLgCK", "question": "Who would be reading these books?", "choices": ["women", "children", "adults", "men"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570074.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573459, "question_id": "U3b3xAxL3Fa3qndfgbjpqT", "question": "What kind of area is the bike traveling on?", "choices": ["highway", "street", "dirt road", "alley"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298996, "question_id": "U4KxgFf4tjrggmLmWqfHK8", "question": "Why is there a pattern on this curtain?", "choices": ["decoration", "warning", "safety", "stain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515138, "question_id": "U4RcXpKPvZ3ZGv5CciLtdQ", "question": "WHat is on the sign besides the words?", "choices": ["stickers", "wool", "wanted posters", "bugs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465912, "question_id": "U5dQLQ3o5WwvPttNp76BPD", "question": "Where is the glow coming from?", "choices": ["toilet", "waste basket", "aliens", "cat's eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465912.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400289, "question_id": "U5qNJq9XFAYjaXknRX2ei3", "question": "Which of these objects is furthest away from the cat?", "choices": ["floor", "tail", "iron", "shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389891, "question_id": "U6G87jE7NTurLw5AfYbaiz", "question": "Where can you find this bathroom?", "choices": ["hotel", "home", "cinema", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389891.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301354, "question_id": "U6NpHknAZ9nH7Zc4WAVmyA", "question": "What is about to happen to this man?", "choices": ["beat", "trip", "wipeout", "pushed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533609, "question_id": "U6tATdNcuxpLBgQZtfUT7R", "question": "What type of business is the vehicle in?", "choices": ["taxi", "food truck", "transport", "delivery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533609.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581260, "question_id": "U77QoJFkMV4GggQqAYuxGy", "question": "What is needed for this activity?", "choices": ["sun", "waves", "rain", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581260.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151191, "question_id": "U7Jdd2dGJ8pbc22XwZunFQ", "question": "How many people can sleep in this room?", "choices": ["six", "three", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151191.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26186, "question_id": "U8DNmePwZDmrXp4o4pSYdg", "question": "What unusual item is shown in the most peculiar site here?", "choices": ["faucet", "toilet flush", "sign", "toilet tank"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026186.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205301, "question_id": "U8jLX9bLvE4EKoTyK9J92e", "question": "Where are these cows located?", "choices": ["zoo", "barn", "cage", "pasture"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205301.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557428, "question_id": "U9KfVdjuL89afLhXdMiUC2", "question": "The pocket watch illustrated or shown here might most likely to have belonged to which character in Wizard of Oz?", "choices": ["dorothy", "scare crow", "tin man", "lion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557428.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40457, "question_id": "UBNkMnxjXo7WtCQz7BjVd7", "question": "How many of the man in the blue and red shirt's hands are holding something?", "choices": ["none", "two", "three", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522753, "question_id": "UBbsa84GDL8N6W86mkFrc3", "question": "What sport star has the same first name as the first word on the sign?", "choices": ["jim those", "steve avery", "john franco", "don mattingly"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000522753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145936, "question_id": "UCkUvncpx5JZiLq4ddhtxc", "question": "Which ingredient has the most carbs?", "choices": ["crust", "cheese", "pepperoni", "pizza sauce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145936.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276867, "question_id": "UCqt8VpDohLskJ7qNsN58d", "question": "This fruit is indigenous to what continent?", "choices": ["africa", "south america", "asia", "north america"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276867.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26923, "question_id": "UDUoatZmdoGkLkoxKoAHTm", "question": "Which type of aero planes are pictured above?", "choices": ["g-6", "warbirds", "helicopter", "boeing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000026923.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399521, "question_id": "UDq4yaShyptx2sWBreXoGE", "question": "What might be the woman's profession?", "choices": ["veterinarian", "police officer", "horse groomer", "horse rider"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415521, "question_id": "UExQbZqVdkXgEwRe4fPXsw", "question": "The boy looks like he is using the skateboard as what?", "choices": ["frisbee", "pogo stick", "spear", "discus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30367, "question_id": "UFRzJBSnDGcX47PDFrgaw6", "question": "Inside this animal you will find what?", "choices": ["stuffing", "guts", "bears", "blood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030367.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402773, "question_id": "UGHFC6KG3NRmsT68SXtjN5", "question": "Which breed are these bears part of?", "choices": ["grizzly", "koala", "black", "polar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402773.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50784, "question_id": "UHefmh7JxSMqVg5XdrUAV7", "question": "The restroom is likely designated for use by what demographic?", "choices": ["men", "women", "children", "families"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200917, "question_id": "UHuZdyvAbcBUTQkv4YGgCz", "question": "What surrounds the rocks?", "choices": ["lemmings", "water", "soldiers", "cows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358835, "question_id": "UJ2SBjGW8kgLwcQQjjaqJS", "question": "What is this zebra trying to do?", "choices": ["drink", "eat", "run", "rest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557674, "question_id": "UJjNRDi9ceNq5gTjFoEvNx", "question": "What part of the animal is raiding the trash can first?", "choices": ["mouth", "paw", "nose", "ears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 12987, "question_id": "UJuX6xjud4e49gnz4Dxyrs", "question": "The symbol seen here is indicative of which religion?", "choices": ["islam", "wiccan", "judaism", "christianity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000012987.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290321, "question_id": "UJvoeZz7K2t428TKtXWrqS", "question": "What is the theme of the location?", "choices": ["water park", "pirates", "carnival", "knights"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290321.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44249, "question_id": "UKACYCQFnTN5BNPbifT2aH", "question": "Riding these animals is a strong tourist attraction in what country?", "choices": ["colombia", "thailand", "jamaica", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 530169, "question_id": "UL5W6HuaYihMZLb7qRArDG", "question": "What type of environment are the elephants living in?", "choices": ["zoo", "natural", "sanctuary", "refuge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000530169.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467036, "question_id": "UL7uTE4XrtzxKYNdps8hqo", "question": "What is this vehicle considered to be?", "choices": ["glider", "biplane", "jet", "prop plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260728, "question_id": "UM8E9yPwgPNxSBKzMq4d6X", "question": "What is the manner of preparation of the greens?", "choices": ["sliced", "julienned", "diced", "shredded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260728.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478790, "question_id": "UNUq54tnRQsikmX46us9Xi", "question": "What is open on the left side of the room?", "choices": ["cabinet", "jar", "door", "window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 580948, "question_id": "UNrYoayAoAYpHYxBXUT7vd", "question": "What word is missing from his shirt?", "choices": ["love", "hate", "often", "learn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000580948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234752, "question_id": "UPQpTtA3xDvpdAHcXjEKZy", "question": "What body part on these creatures is likely to most severely injure the other creature?", "choices": ["belly", "tusks", "ears", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 531881, "question_id": "UQMpLnz8sgDNAzRJ6ynKz7", "question": "The pants the man has on would be good camouflage in what environment?", "choices": ["grassland", "igloo", "tundra", "skyline"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000531881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282112, "question_id": "UQimkV45cBxbjp8XCQUMdY", "question": "What kind of exterior building material is most prominent here?", "choices": ["glass", "metal", "wood", "brick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282112.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251, "question_id": "UQqpcp9UiiWUxhZVczWcXs", "question": "What is most likely surrounding this area?", "choices": ["tundra", "black hole", "desert", "beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000251.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275467, "question_id": "URG7Z5qrzvTRoxqq4cj7pm", "question": "The poster encourages whom to give bikes 3 feet?", "choices": ["motorists", "pedestrians", "convicts", "animals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275467.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519464, "question_id": "URZTVXqUHpdZ7voXf44Ztw", "question": "Which country was this picture taken in?", "choices": ["united states", "france", "new zealand", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267617, "question_id": "UTA5oCf8iX8JQJzX9z248G", "question": "What is she ready to do?", "choices": ["throw", "swing", "bat", "catch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267617.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54070, "question_id": "UTDjNXtp4wDfSLkS56jdxF", "question": "In which one of these countries would it be difficult to operate one of these hotels?", "choices": ["iceland", "sweden", "norway", "barbados"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344723, "question_id": "UTExWLdGZYFonmEJKs5C3A", "question": "What would one have to do to legally park here?", "choices": ["come early", "insert coin", "paint wall", "back park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159565, "question_id": "UUENXEQYfbuzyRb4HZAm55", "question": "What is needed for this activity?", "choices": ["sand", "wind", "waves", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 469132, "question_id": "UUnTJ6HaEhvVzeSihdqCsR", "question": "Why is the bear inside?", "choices": ["broke in", "someone's pet", "was trapped", "it's mounted"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000469132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 244113, "question_id": "UVCP6VfMhNWrSwLeK7nfo4", "question": "What is helping the bird float?", "choices": ["plywood", "raft", "boat", "surfboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000244113.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511459, "question_id": "UVD2w7h52hpkGf5cvptKrP", "question": "What might be hung from the silver bar across the top of the white item?", "choices": ["loofer", "rope", "sponge", "shower curtain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549446, "question_id": "UVu8PQCbqtmfKBLenVQX5Z", "question": "What type of area is this signage located in?", "choices": ["mall", "tundra", "city center", "park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 489880, "question_id": "UWwJZBuzfxpLJVFFapwXXm", "question": "What type of range is shown here?", "choices": ["mountain", "gun", "gas", "math"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000489880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161119, "question_id": "UXYfhm6Yrpfwovz2Qr4mwZ", "question": "What is another name for the hills behind the shore?", "choices": ["reefs", "mountains", "bluffs", "dunes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161119.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521308, "question_id": "UXsjH6sH6zmvcDT5Quvyiv", "question": "What season is it presently?", "choices": ["fall", "winter", "summer", "spring"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147964, "question_id": "UYGKXsFEAMhdFC6FvvYAXc", "question": "Who is famous for playing this sport?", "choices": ["maria sharapova", "mike trout", "pele", "lebron james"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147964.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442709, "question_id": "UYL9daSdDVTXaz3rx6ikAn", "question": "What information is provided on the wall?", "choices": ["weather", "latitude", "longitude", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 28143, "question_id": "UYz7aNjp7oLZ3EU5XDqnKL", "question": "What would happen to the rider if they fell off the horse?", "choices": ["get wet", "get dry", "win prize", "get hungry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000028143.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10573, "question_id": "UZiJZi47A9ALDZieNVE8Mq", "question": "What will she have soon?", "choices": ["sister", "puppy", "baby", "husband"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415541, "question_id": "Ua7CTw9VzuWgKdxvw4s5rv", "question": "Which item was probably added after cooking?", "choices": ["white", "red", "green", "brown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575689, "question_id": "UcnSsMALWnemz3i7LaPGt6", "question": "What type seat does the person taking this photo have?", "choices": ["stowaway", "aisle", "captains", "window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575689.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219805, "question_id": "Ue2o9gdXPGppkW9Ps5Wyyo", "question": "What are these animals doing?", "choices": ["jumping", "traveling", "eating", "sitting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000219805.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238283, "question_id": "UfBzPKEcegDDRLdtVjundm", "question": "Where could the delivery trucks be making deliveries to?", "choices": ["apartment", "school", "stadium", "duplex"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000238283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293156, "question_id": "UgFfW4QrVAJKqamneVkxNW", "question": "Why does this person have their head covered?", "choices": ["fashion", "uniform", "protection", "religion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35359, "question_id": "UgxNuViouhW644wYCUtLig", "question": "What is the base of the structure made out of?", "choices": ["metal", "brick", "wood", "tile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035359.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512229, "question_id": "UhKFYixKQkzGVrG38gWSum", "question": "What is placed in the water?", "choices": ["fish", "pencils", "stems", "crops"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512229.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157388, "question_id": "Ui38bvRUH7YATCd8wXDe9k", "question": "Who most likely took this photo?", "choices": ["computer programmer", "wildlife observer", "receptionist", "joe biden"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472767, "question_id": "Uk6urBApDjZhKSMZmyJwen", "question": "What belongs where the lamb's front legs are?", "choices": ["food", "tools", "water", "wool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346680, "question_id": "UkcuzuR9w5kXA7YZQvXquk", "question": "Why are there two cats?", "choices": ["mirror", "carpet", "reality", "owners"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224631, "question_id": "UkjK7XxonwCVXsGhsZRaC4", "question": "The animal shown here is a distinct member of what species?", "choices": ["black bear", "none", "pooh", "ursa terribilis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44731, "question_id": "UmQoPNM74ezymkFfECXfVy", "question": "Where is the item that the man is thinking about?", "choices": ["couch", "hand", "shoulder", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562707, "question_id": "UoaSUetu7siCmR6TVb9VtX", "question": "In what year was this airline established?", "choices": ["1969", "1955", "1975", "1982"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163190, "question_id": "UozyQEg4vgCL2xwjPjwpu6", "question": "Is the window of the car closed?", "choices": ["unsure", "yes", "probably no", "no"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163190.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172618, "question_id": "UpPgGyeKA8SAiWo5ic3Vye", "question": "Who likely holds the other end of the kite strings here?", "choices": ["baby brother", "parent", "dog", "stranger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172618.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401230, "question_id": "UprFnr95VbH7faMqvrwP5R", "question": "What type of animals are shown?", "choices": ["domestic", "aquatic", "stuffed", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504395, "question_id": "Uq4FsRfrbJ5c5yXCyEELJk", "question": "Which object would move the pointer on the monitor?", "choices": ["mixer", "circle mouse", "chair", "keyboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504395.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452469, "question_id": "Us4BYvKDZ5rUsgqwkRLiwS", "question": "What is this man dressed for?", "choices": ["business", "swimming", "exercise", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558004, "question_id": "UtXfRCZqNs7JDsp3Wk4TZE", "question": "What type of flooring is shown?", "choices": ["tiles", "marble", "hardwood", "carpeted"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558004.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172098, "question_id": "UuUtkKp8MNMSxWGg4j34Kk", "question": "Which of the produce items would be unpleasant if eaten by itself?", "choices": ["banana", "carrot", "lime", "apple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172098.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561420, "question_id": "UvZJdSoCDwRYCRNbG5etiS", "question": "What type of transportation is shown?", "choices": ["air", "rail", "water", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561420.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 148513, "question_id": "UwVxmnwXUK3gACQPKrwYj8", "question": "Which animal is the youngest?", "choices": ["front", "back", "middle", "same age"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000148513.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379752, "question_id": "UwidHGSBU33mjJirpDYG8L", "question": "What is missing to the left of the number eight on the keyboard?", "choices": ["power button", "mousepad", "key", "spacebar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381111, "question_id": "UxQ8YWQMgRyGECeKvvddqA", "question": "What body part do you use here to flush?", "choices": ["rear", "finger", "foot", "hand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126453, "question_id": "UxjSiUiWLp9z7m6iQfLUrp", "question": "What is the purpose of the brown object at the top of the mans pants?", "choices": ["add visibility", "provide protection", "secure pants", "provide warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126453.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558691, "question_id": "UzpkbjUpfoBeRJq2u5aMwZ", "question": "What demographic of people would this room most likely be used by?", "choices": ["young adults", "babies", "seniors", "middle aged"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000558691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142219, "question_id": "V2YQLZ5DwqW9pfPKuHqaMU", "question": "What do they mainly fly from place to place?", "choices": ["packages", "groceries", "people", "animals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142219.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455850, "question_id": "V2aZ558hqdaN49D3QzuUZa", "question": "From which animal is the main meat topping shown here originated?", "choices": ["cows", "pigs", "goats", "buffalo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455850.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439070, "question_id": "V2dJ2T79dvWBVKejDNNJTR", "question": "What one of these foods is often the filling for this type of delicacy?", "choices": ["kiwi", "eggplant", "corn", "apple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247118, "question_id": "V3VqPRY3v643GkDWwuaXvR", "question": "What does this man seem to be using the brush as?", "choices": ["mirror", "microphone", "musical instrument", "something edible"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247109, "question_id": "V44JXw3uyp9pPnJmSgYFzs", "question": "What is the possible reason the man is wearing the hat?", "choices": ["warmth", "uniform", "collision protection", "sun protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247109.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377228, "question_id": "V54Q4xu62PnB76XtDbW54F", "question": "What is the name for a juvenile of this animal?", "choices": ["kitten", "puppy", "kid", "calf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464852, "question_id": "V5pJQQDkUy6jDzHnbEco8h", "question": "What is near the helicopter?", "choices": ["snowboarder", "rainbow", "paratrooper", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464852.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182694, "question_id": "V6DUUeozi4Zvamq2Yffbk2", "question": "What character wears a similar color pants to the man?", "choices": ["leprechaun", "headless horseman", "wendigo", "yeti"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000182694.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316046, "question_id": "V6rkjo3u5fSecpxwdaPDnY", "question": "What body of water is the white board made to be used in?", "choices": ["lake", "ocean", "stream", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330949, "question_id": "V7Hkh3ikcPDymUHJLB56dR", "question": "What type energy fuels this conveyance?", "choices": ["electric", "gas", "human", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330949.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346760, "question_id": "V82ukCnuVt469YjCVUfSkK", "question": "What material is the vase made of?", "choices": ["clay", "ceramic", "rubber", "porcelain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279719, "question_id": "V8cQtJj9hYiQQ4kdVGhHsm", "question": "If you committed a crime at this corner how might someone find out about it?", "choices": ["child reporters", "video recording", "tattle tale", "rat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189837, "question_id": "V8uEtgnNGguMyeYuZR3M5t", "question": "In which shire county does the bus travel?", "choices": ["east sussex", "avon", "cheshire", "hampshire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373907, "question_id": "V8viFrPS3US388vuR3FZTA", "question": "What style of shoe are the women wearing?", "choices": ["sneaker", "boot", "loafer", "sandal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461852, "question_id": "V9minrKAi7JkoKb4bhzgX3", "question": "What movie was about this animal?", "choices": ["free willy", "seabiscuit", "jurassic park", "benji"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461852.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77289, "question_id": "V9tcyr5AGgkzjgtHUfDKMy", "question": "What item is added to the black circle on the side of the vehicle?", "choices": ["gasoline", "wax", "plastic", "batteries"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178102, "question_id": "VA7G7PQtmrcbwaMqVWnyZ2", "question": "What is this animal trying to do?", "choices": ["swim", "sleep", "attack", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178102.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53997, "question_id": "VAmCK5qfFSwcvG27SgZyoo", "question": "What gives the orange vegetable its color?", "choices": ["chlorophyll", "murex", "melanin", "carotene"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503377, "question_id": "VB8kPLSB854PBbgtwGV9Z4", "question": "What is the name of this toilet design?", "choices": ["wall hung", "p trap", "parryware", "floor mounted"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000503377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348548, "question_id": "VBoijPpCjzqmGqou9mmMVA", "question": "This device heats using what?", "choices": ["coal", "fire", "oil", "radiation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155635, "question_id": "VBvhjQcHV4WnXqimsT6bks", "question": "Which activity is only one person doing in this part of the ocean?", "choices": ["boogie boarding", "paddle boarding", "surfing", "skim boarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155635.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 489532, "question_id": "VCMPFZpbwxx7frzYEYBqRa", "question": "What would cause the person to do this with the suitcase?", "choices": ["fell over", "humor", "lost items", "exercise"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000489532.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35877, "question_id": "VDCoYUHakL6zPXRsc8RKYp", "question": "Who defaced this sign?", "choices": ["glen", "quinn", "nancy", "no one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170433, "question_id": "VDQ4jRyKvndsMGqnQR4xuk", "question": "What are all the wires for?", "choices": ["tv", "computer/electronics", "air conditioner", "appliances"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401356, "question_id": "VDUEW83b6SZc2SJdvhAGFE", "question": "What celestial body does this person face?", "choices": ["venus", "sun", "mars", "moon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401356.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281252, "question_id": "VDfjJoNTWRoZEK5ZpXiniw", "question": "What show is this cat watching?", "choices": ["kitkat club", "pussy galore", "james bond", "none"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000281252.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282637, "question_id": "VDhuoVVjywW2BeYL2nfa6s", "question": "In case of gas shortages what would the occupant here utilize to get around?", "choices": ["van", "bike", "suv", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282637.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127487, "question_id": "VEGMmwSTH6HtbxmqtJKAZP", "question": "Where is this plane most likely stationed at?", "choices": ["china", "japan", "turkey", "usa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127487.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163373, "question_id": "VHEai6iZCHmESdLTjeGKgT", "question": "What is the purpose of the columns?", "choices": ["separate sections", "decoration", "runs clock", "hold roof"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163373.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478777, "question_id": "VJE98ipwoUrt5GbFNZVXni", "question": "Which item here is particularly long?", "choices": ["baseball bat", "beak", "cucumber", "toothpick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267809, "question_id": "VJzrTaxwLfG57v5Ds7qjBa", "question": "How were the hard candy items atop the doughnut here affixed there?", "choices": ["stirring", "by dipping", "elfs", "melting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31198, "question_id": "VLWTs3AoC99Ut9pMkyyhVH", "question": "How many birds are on the log of wood?", "choices": ["three", "four", "seven", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45233, "question_id": "VMDmPLWfY4QU2LzbuxG3kD", "question": "Who might sleep in the small bed here?", "choices": ["mom", "father", "pet", "mother"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045233.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232006, "question_id": "VNhsBPwfnuPnNDrxG7wXtG", "question": "What human like expression might this animal be able to carry out?", "choices": ["math", "scheduling", "geometry", "speech"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232006.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416173, "question_id": "VNp34QGHz7U8pwagr7pwsx", "question": "How many animals are sitting on the neck of the large spotted animal?", "choices": ["two", "three", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000416173.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249300, "question_id": "VPFMYHXzrDEaHscyYLRgo4", "question": "Why is the other bird not joining the one in the feeder?", "choices": ["not hungry", "is sleeping", "different species", "is afraid"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000249300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579763, "question_id": "VPWGnzXxCgrhWwFAeKnWJw", "question": "What type of crust is shown here?", "choices": ["deep dish", "thin", "raw", "stuffed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579763.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84079, "question_id": "VPYGav44Fv9gTpWFTMqLZ8", "question": "What is touching the stuffed animal?", "choices": ["plant", "cake", "baby's finger", "woman's toe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513223, "question_id": "VPvE84r5DdBbqGq9nJRt28", "question": "What are the three zebras in the foreground currently doing to nourish their body?", "choices": ["drinking", "grazing", "cleaning themselves", "hunting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513223.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357645, "question_id": "VQ9GcN4nHMSzCpLSBxcyxB", "question": "What company is known for using these vehicles?", "choices": ["mcdonald's", "ibm", "delta", "ford"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357645.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386903, "question_id": "VRCnn7AY4xwRx3FhVNsvKS", "question": "What kind of leaf is the same or similar to the ones sticking out?", "choices": ["burch", "lilies", "grass", "cheese cloth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386903.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290534, "question_id": "VRjzCjT3bD4cPH3Zt42QKe", "question": "What tool has a black handle?", "choices": ["spatula", "scissors", "cutting board", "pizza cutter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290534.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511864, "question_id": "VRsvEhP3hWY6kwNtGSdY7w", "question": "What type of surf board is the woman holding?", "choices": ["gun", "shortboard", "longboard", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522648, "question_id": "VShRYH6oWMiADbPDSsM3QJ", "question": "What name is given to the person playing this type of sport?", "choices": ["attacker", "player", "celebrity", "surfer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 531231, "question_id": "VSq8CB9MWe38roB5ZWqX4V", "question": "Which visible item is likely to stretch furthest?", "choices": ["paper", "rubber band", "phone", "recorder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000531231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226098, "question_id": "VUyQTcGkLbRpz8zjS5W29v", "question": "What does the N stand for?", "choices": ["nebraska", "namibia", "nippon", "netherlands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226098.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325174, "question_id": "VVUu89DKar9tWQGGhBw9Ui", "question": "Which of these movies feature this animal in its main cast?", "choices": ["madagascar", "shrek", "finding nemo", "toy story"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325174.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243905, "question_id": "VW3L4ZN3zmVLzKbTdFttTy", "question": "What type of coat coloration does the cat on top of the luggage possess?", "choices": ["tabby", "tuxedo", "solid", "pointed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243905.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 359199, "question_id": "VWFsza6idQurNDw3GcgjeV", "question": "What appears to be enmeshed in glass here?", "choices": ["ice pick", "straw", "comb", "toothbrush"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000359199.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468766, "question_id": "VXFvEgSK4AG5ddgnax7eDr", "question": "The zebras are standing on what?", "choices": ["water", "benches", "boxes", "tables"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233605, "question_id": "VXhkuREYyfri63Go5vKqAm", "question": "What might the baby get in his mouth?", "choices": ["toothpaste", "dirt", "cloth", "hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233605.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263164, "question_id": "VYqwkmSgtG2ckcor98997W", "question": "What is a famous sports ground where this is played?", "choices": ["wimbledon", "oval", "wembley", "yankee stadium"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263164.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366837, "question_id": "VarwFJ9MxJBdUY5fx7euKn", "question": "What type of bridge is this?", "choices": ["arch", "beam", "cantilever", "truss"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 140784, "question_id": "VbiQoRPfbYPy8WGedjDpDc", "question": "What is this dog trying to do?", "choices": ["drink", "eat", "watch", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000140784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422034, "question_id": "VcCGsE7RGds83wxE7ThJvh", "question": "What is the giraffe sitting in?", "choices": ["dirt", "concrete", "sand", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422034.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383203, "question_id": "VcCvhUEmDsKHUJimNvcbWx", "question": "The blanket features the name of a famous what?", "choices": ["author", "politician", "singer", "designer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000383203.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544192, "question_id": "VcP6pSDjFRqbGi4pFN86bj", "question": "Other than divers what is the style of eating these creatures do?", "choices": ["chewing", "tearing", "dabbling", "pecking"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000544192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212325, "question_id": "Vcov3zW5ihRwSiU4dUjqWK", "question": "Why is he wearing gloves?", "choices": ["grip", "fashion", "warmth", "germs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212325.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222162, "question_id": "VddAuiW2E4YUwY4fknpSaz", "question": "What activity is being shown here?", "choices": ["drinking", "cooking", "eating", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318236, "question_id": "VegzaLHYgMiTSKCLxkyxNk", "question": "The location in the image is most likely in which part of the U.S?", "choices": ["south west", "central", "north east", "south"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318236.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326675, "question_id": "VeiRfiQjPSrv2ABYzRLuZG", "question": "What type of food is shown?", "choices": ["burrito", "burger", "pizza", "taco"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326675.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487372, "question_id": "VfYLh4HVaq2ZhAWdXGM2v2", "question": "What might prey on these creatures?", "choices": ["dogs", "eagles", "hippos", "lions"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487372.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250573, "question_id": "Vfe4gJeJm28SnZGHNB7tG3", "question": "Which handle should one turn if they want luke warm water?", "choices": ["none", "both", "right", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303724, "question_id": "Vgohz3HyxM7KTN7SpyqLSP", "question": "What might cause the giraffe to put it's head near the measuring pole?", "choices": ["bright colors", "measure weight", "food smell", "human signal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428443, "question_id": "Vh35A2xdYoEvTxV538cCiv", "question": "Which feature seen here is often removed or absent from this type animal?", "choices": ["ears", "horns", "cuds", "eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550218, "question_id": "VhTMWJLBmS7e95wQK7GbmE", "question": "What is the status of this food?", "choices": ["bitten", "broiled", "diced", "uneaten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218803, "question_id": "VhagLghxm27KWWFLA9XKYt", "question": "What does the K stand for?", "choices": ["korean", "koninklijke", "kiwi", "kenyan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379236, "question_id": "ViDUWLYyHzkTkKeZUp6TZb", "question": "The man holding the green cup and wearing the black fleece jacket works in what field?", "choices": ["education", "finance", "medicine", "information technology"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379236.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548228, "question_id": "ViddNS7iaSKQ4ZhLBtvQZn", "question": "What are the Zebras touching?", "choices": ["noses", "hooves", "tails", "lips"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144041, "question_id": "VjdJnB7RQkNqi8NGbb8jih", "question": "What is she doing with the phone?", "choices": ["dialing", "texting", "posing", "video games"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144041.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467402, "question_id": "VkYNp6FHGQuVPVcuaPDx4c", "question": "Food for these animals was placed here by whom?", "choices": ["themselves", "no thing", "their mothers", "owners"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000467402.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86120, "question_id": "VknJoWBTW7kztLzS2AHrH7", "question": "What is causing the baby cow to chew on the women's fingers?", "choices": ["fear", "anger", "flavor", "hunger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419654, "question_id": "VnLKkVkuwKqQDRagmJPqcC", "question": "Where is this bathroom located?", "choices": ["gas station", "home", "office", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419654.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501878, "question_id": "Vnb2dKKwxTJ6a83xZtPYNc", "question": "What type of buildings are the ones in the top right?", "choices": ["airport terminal", "offices", "apartment complex", "hospital"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501878.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283235, "question_id": "VomK3tFW8JNmQhB8tQcHCm", "question": "What type of phone is on the left?", "choices": ["flip", "rotary", "corded", "smart"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291846, "question_id": "VorjiupkEQWfMfJeDHLciG", "question": "What sort of building does this clock likely rest atop?", "choices": ["court", "private home", "church", "ball arena"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 426017, "question_id": "Vpj42wHNZCKUSSHcau5m3E", "question": "The family this street is named after were well known for their what?", "choices": ["beauty", "tavern", "cruelty", "orchard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000426017.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 523395, "question_id": "VpvWJ5TEty9CaeUwTZGMJz", "question": "On which continent does this conveyance travel?", "choices": ["north american", "europe", "asia", "antarctic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000523395.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197730, "question_id": "VqTGqnUZodUddeTnnoEAPG", "question": "What type of area is shown?", "choices": ["rural", "arctic", "seaside", "urban"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000197730.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561740, "question_id": "VqvphwbAhgQiF47MCnt3ku", "question": "What is the best time to eat the following food?", "choices": ["supper", "any", "breakfast", "lunch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561740.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422372, "question_id": "VruckMMwcWbp3zzEXYaLy7", "question": "What sports team is named for this animal?", "choices": ["cardinals", "wolverines", "bears", "timberwolves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422372.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311115, "question_id": "VsUUABosW8gtUiKTLvfK6f", "question": "Who would have to evade the giraffes?", "choices": ["predators", "incoming cars", "hurricanes", "incoming boats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311115.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32152, "question_id": "VsWDvM8pgJPQ9wq8zftXWA", "question": "What might make this bathroom embarrassing to use?", "choices": ["floor", "walls", "shelving", "windows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032152.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548553, "question_id": "VsX6jkdXnde5JcAxx6kP38", "question": "What kind of fuel does this aircraft run on?", "choices": ["gas", "vegetable oil", "jet fuel", "kerosene"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548553.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122078, "question_id": "VspwycfrFsCbQ8KyhLb2c2", "question": "What is at the bottom of the hill?", "choices": ["airport", "amusement park", "lake", "town"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35162, "question_id": "VszXhvG8oviapRZiyvqKnW", "question": "What invention made these items more rare?", "choices": ["cotton gin", "printing press", "steam locomotive", "microwave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525974, "question_id": "VtbbdUHRhBnKQJgD78zJ8W", "question": "What type of vehicle is the person sitting on?", "choices": ["van", "truck", "car", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525974.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97554, "question_id": "Vu5DxUfHJH8m7s2wMMpctr", "question": "The television company got its start in what industry?", "choices": ["cars", "computers", "radio", "telescopes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097554.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385200, "question_id": "Vv4SguqbNPdP4pRT3gfwjr", "question": "Which color would most likely cut the others here?", "choices": ["white", "blue", "tan", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490565, "question_id": "Vv5YCbL9KygPNDc8PqhpaH", "question": "Which chess piece does the logo on the sign look like?", "choices": ["knight", "rook", "queen", "king"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2991, "question_id": "VvtZmRVWHLPC35TWB3mBNB", "question": "What type toothbrush does this lady own?", "choices": ["charcoal", "electric", "twig", "manual"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102477, "question_id": "VwGCEQatbkimJURFWxqXLC", "question": "How is this toothbrush powered?", "choices": ["hydropower", "manual power", "battery", "solar energy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102477.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521330, "question_id": "VwsVoGCHiMtVuPVUBguqs7", "question": "What is the company whose logo appears on the jacket known for?", "choices": ["wheelbarrows", "cheese", "phones", "desktop computers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521330.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297057, "question_id": "VwyPZtB6XiVCx5gQbeGrtD", "question": "What additional equipment is needed to play this game?", "choices": ["croquet mallet", "tennis racket", "pool cue", "bowling ball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216318, "question_id": "VxcCVKWK3JEFoENtfVC6od", "question": "Which of these lanes should ideally be utilized to pass either silver car shown here?", "choices": ["rightmost", "none", "leftmost", "center"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329210, "question_id": "VxiW8A682vRChypsXeEBTX", "question": "How many daily trips does the plane seen here?", "choices": ["three", "none", "one", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329210.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258162, "question_id": "VyVq5dForFLtwBatyLDYzb", "question": "What does the apparatus on the ceiling provide besides cool air?", "choices": ["surveillance", "light", "heat", "protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514286, "question_id": "VzkWG5hqQUA6RNfAjwM3ti", "question": "What is the weather at this location?", "choices": ["mostly cloudy", "partly cloudy", "clear", "overcast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325117, "question_id": "W3GBtVcaAYwGVBhR2MptFe", "question": "The animal the cat is looking at is in what class of animals?", "choices": ["mammalia", "aves", "amphibia", "reptilia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325117.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3407, "question_id": "W3gnm2Lgw2jzDznN3UvwXh", "question": "The amount of forks on the table indicate there are how many people eating the cake?", "choices": ["two", "three", "one", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003407.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 94641, "question_id": "W4SAcFNmWmWFgNn35r7xNR", "question": "Besides bathing what is the bathtub in the room used for?", "choices": ["plants", "storage", "fish", "dishes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000094641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229375, "question_id": "W5KRNjNMUMqya77a2eToE2", "question": "What is the word for the singular form of the markings found here?", "choices": ["graffito", "vandal", "urban", "signs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339642, "question_id": "W6JjvHLpEBQHyAP5N9j4T9", "question": "What kind of cat is this?", "choices": ["orange tabby", "ocelot", "calico", "siamese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339642.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9676, "question_id": "W6TstdtU4aWCWmdLigKRes", "question": "What skills would one need to qualify to sit in this chair?", "choices": ["spelling", "running", "swimming", "gymnastics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009676.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93943, "question_id": "W6qqj7MEAFKZAzYXaSdUTX", "question": "What style of shirt is he wearing?", "choices": ["muscle shirt", "hoodie", "t-shirt", "cardigan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337243, "question_id": "W6tXvFS8kGqZ6iFzE5pwiX", "question": "What type of animal is this?", "choices": ["arctic", "domestic", "wild", "aquatic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20197, "question_id": "W78WD8DGHoeT4Qjjs6BF3Y", "question": "What feature are these animals known for?", "choices": ["stingers", "web spinner", "talons", "tusks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020197.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578799, "question_id": "W7Q5vuqhWUCoQTzXHpXxzH", "question": "At which location is the skateboarder skating?", "choices": ["skate park", "mall", "city hall", "parking lot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578799.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552429, "question_id": "W8v97FAkB5vftsPwwY5CZG", "question": "How has this food been prepared for serving?", "choices": ["scooped", "poured", "sliced", "diced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463343, "question_id": "W9yXYkKYTp7WUBT6xnsdNQ", "question": "In what direction is the white SUV driving?", "choices": ["east", "south", "north", "west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434218, "question_id": "WAJqnGfw2GLpNtmkiW4MLc", "question": "In which city is the tennis court located?", "choices": ["london", "melbourne", "sydney", "new york"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209406, "question_id": "WANvyjtWZf9fZ5DbV5L4o2", "question": "What leavening is used in this food?", "choices": ["butter", "none", "milk", "yeast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209406.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310650, "question_id": "WAnhMUquYiKfPHBKdQLPmP", "question": "What beverage is served at sports games observed or participated in by the riders on this bus?", "choices": ["milkshakes", "milk", "gatorade", "hot toddies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310650.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555353, "question_id": "WC7JQiZLCiMw6VTQiY37d4", "question": "What is required for this activity?", "choices": ["wind", "rain", "sun", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108771, "question_id": "WCdfmCpSzCoXXJh8BxSDNM", "question": "How many times a day should people ideally use these?", "choices": ["four", "ten", "none", "twice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122078, "question_id": "WD2EBL2yRonzW9HHLujAW6", "question": "The U.S. state for this location is most likely in what region?", "choices": ["south", "central", "north east", "south west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 237585, "question_id": "WDY3JDuNwkryMsQoMdZdaK", "question": "What kind of communication is on the sign in the background?", "choices": ["psa", "directions", "wanted poster", "advertisement"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000237585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251426, "question_id": "WDqNNS7cWMUG5tdhUUgbsa", "question": "In which country is this sign for camping located?", "choices": ["austria", "germany", "france", "belgium"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251426.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436364, "question_id": "WDw9ebNnS6iHqSsWoDkvWK", "question": "Why is he in midair?", "choices": ["just jumped", "falling", "slipped", "bounced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477254, "question_id": "WDzLW7hiTLnD7B6m6wMjmm", "question": "How does a young child usually refer to the item that is white and is against the right wall and on the floor?", "choices": ["dada", "baba", "moo moo", "potty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477254.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442920, "question_id": "WEeYULCFmX3adogp5xgf5L", "question": "What is the one really odd object to be in the bathroom?", "choices": ["camera", "wallpaper", "apple", "shelf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262881, "question_id": "WFDnvReVWv58pmMbwuehVL", "question": "What is this plane used for?", "choices": ["military", "shipping", "passengers", "testing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000262881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119396, "question_id": "WHjgEYNi6bwuVUUk7me8aH", "question": "Which part of this sign was not officially placed there?", "choices": ["red background", "eating animals", "fastners", "stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199521, "question_id": "WJSJKrrv6GUGH7pjcmXmaZ", "question": "Which movie studio sponsored the event?", "choices": ["warner brothers", "disney", "dreamworks", "paramount"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25599, "question_id": "WPMPZPbTYoPTyr7VKgZtrr", "question": "Which way are the two zebras heads pointing?", "choices": ["left", "down", "up", "right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025599.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280285, "question_id": "WPRABmS6zjcY9FdnF4bZUF", "question": "How are these two people in white shirts related to each other in this situation?", "choices": ["classmates", "coworkers", "teammates", "lovers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410026, "question_id": "WPWxPSiEHRrCSydoWmfCzo", "question": "Where did the main meal item live before it was harvested?", "choices": ["air", "water", "tree", "land"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87523, "question_id": "WPy2jA8aitpGEuKpRhqpRs", "question": "What is keeping the bear from freezing while laying on the snow?", "choices": ["wind", "fur", "skin", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 526379, "question_id": "WQBxZrNeeTrEBN3C6zb7P8", "question": "What is the job of the person loading this luggage?", "choices": ["handler", "captain", "cashier", "pilot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000526379.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116346, "question_id": "WQXxMAQbn5Z3MkWNmaL2up", "question": "The batter for the first item is made from what?", "choices": ["flour", "grits", "almond flour", "cornmeal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36538, "question_id": "WSDBiN3MuwJMjtSJCoAR3k", "question": "What kind of street location are these signs at?", "choices": ["overpass", "highway", "intersection", "underpass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302158, "question_id": "WStsve46o2vVeLcaiucPXJ", "question": "What is likely transferred in what the bird is sitting on?", "choices": ["satellite signal", "electricity", "water", "oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302158.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26772, "question_id": "WT4wsgXfHeMKY9bPjCbNnh", "question": "How many devices are on?", "choices": ["three", "none", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026772.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518864, "question_id": "WTAKhzvnzKz74jXNNW6BHR", "question": "What vehicule is this man waiting for?", "choices": ["bus", "boat", "train", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534059, "question_id": "WTPi24dzCqs85v7JDCsFUw", "question": "What is the name of the following bird?", "choices": ["flamingo", "seagull", "crow", "egret"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369724, "question_id": "WTnkSvQdRi8sLJLowVPLMe", "question": "What is the sink made from?", "choices": ["plastic", "glass", "ceramic", "steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443559, "question_id": "WTuxfayaYsFTAE3YPDhiDP", "question": "What is causing the reflective look in the middle of the picture?", "choices": ["wind", "glass", "paint", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443559.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364231, "question_id": "WW6wAqwCAJCF7LbhWCy75r", "question": "Why are the zebras all headed in the same direction?", "choices": ["seeking food", "lost", "being chased", "protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000364231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47079, "question_id": "WW7nVhnqHTBwYYDXLGnPzn", "question": "What service do these people work for?", "choices": ["military", "fire", "police", "paramedic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000047079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409696, "question_id": "WWsCCddQXGSQhZbauNR3Wb", "question": "The child here might inadvertently turn on what here?", "choices": ["garage door", "fridge", "tv", "pager"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409696.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325772, "question_id": "WXTL9XeYKzcaFvaNjDsPYJ", "question": "What shape is the vase?", "choices": ["oval", "triangle", "square", "rectangle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325772.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560979, "question_id": "WYCdM45aPybTLpuMkdv3CL", "question": "How many subspecies of this animal are there?", "choices": ["nine", "six", "two", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560979.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385887, "question_id": "WYfFrczxnMvRDwVpvnmApo", "question": "What part of the plane is facing the man?", "choices": ["passenger windows", "wing", "nose", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544885, "question_id": "WZ6RH6qVC6psbJmxhofAA5", "question": "South Western Railways 707 is belongs to?", "choices": ["british", "china", "us", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544885.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196884, "question_id": "WZ8bFyanyumQmoCkJq4BFB", "question": "What does the advertiser hope people will choose?", "choices": ["their airplane", "their supermarket", "their hotel", "their cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196884.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379899, "question_id": "WZBH4MMqcTNbdbeDm8RveG", "question": "What type of transportation is shown?", "choices": ["air", "water", "land", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379899.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549078, "question_id": "WZPYiMgbFxA6Du38sVDcVy", "question": "What is the likeliest location of the elephant?", "choices": ["zoo", "desert", "tundra", "savannah"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288296, "question_id": "WZZPbyRyzUePxHLHMFkavA", "question": "Where did the song referenced peak on the Billboard Hot 100?", "choices": ["four", "ten", "eight", "one"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000288296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200364, "question_id": "Wasm9KUVuewPrBXpmSyNDw", "question": "Why is she wearing the same color?", "choices": ["in hospital", "stolen", "is mother", "found shirts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445757, "question_id": "WdGhkmhvqi3gCGz3QNorag", "question": "Why is the flower in the vase?", "choices": ["fell there", "hides vase", "display", "planted there"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000445757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176990, "question_id": "WeN5P8BFPjVnqSKf235jBd", "question": "This truck specializes in food from which region of the world?", "choices": ["mediterranean", "south american", "polynesian", "caribbean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176990.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87310, "question_id": "WewL6FEw7mQgSPX6AC3zyp", "question": "What club is the jersey wore by the person above belong to?", "choices": ["barcelona", "leipiz", "real madrid", "manchester united"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087310.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417255, "question_id": "WfEUSqa5ERywkhShYmffcF", "question": "Where is the snow located?", "choices": ["tree", "car", "cat", "roses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417255.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87424, "question_id": "WfiJN5fiZ5zHbbuPy9WncW", "question": "What part of this bear is doing most damage to the white disc?", "choices": ["tail", "teeth", "paws", "nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505434, "question_id": "Wg69H5TANvTjmMCV4mBGRA", "question": "What type of phone is being used?", "choices": ["cellular", "pay", "rotary", "landline"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000505434.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 149859, "question_id": "WgEtXULAFdFxV3uw7qzAAo", "question": "What type of area is he in?", "choices": ["rural", "desert", "urban", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000149859.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418689, "question_id": "WgiCukg5sd7BuedBowiAiL", "question": "Persons here wear vests of yellow for what reason?", "choices": ["visibility", "none", "security", "fun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418689.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74763, "question_id": "Whmj3oaLUVgmDTDK2hkqox", "question": "This man looks most likely to work where?", "choices": ["museum", "butcher shop", "lifeguard station", "fire department"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074763.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53667, "question_id": "WhuERMZa96Jb4fEieW62yb", "question": "When was the last time this airline had a fatal crash?", "choices": ["1977", "1995", "2005", "1985"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053667.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518641, "question_id": "WjoBAyhAfhCX8xNJ8XhHAG", "question": "What is visible on the animal's feet?", "choices": ["claws", "fins", "eyes", "hooves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342000, "question_id": "WjtxK2vBrinZnYBDWJhRxW", "question": "What is missing on the bed that is part of most beds?", "choices": ["mattress", "frame", "box spring", "comforter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16790, "question_id": "WkssU3FgYLrGKNLgkF6uRt", "question": "What is found on top of the elephants head?", "choices": ["hats", "sand", "grass", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7312, "question_id": "WmVtRsQpy667eahQsAurab", "question": "What kind of skiing is this?", "choices": ["downhill", "alpine", "extreme", "cross-country"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007312.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399056, "question_id": "WmYuMCofKkdBWzBRXrRBms", "question": "From what video game console system is the steering wheel in the basket used with?", "choices": ["sony playstation", "microsoft xbox", "nintendo wii", "nintendo switch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547019, "question_id": "Wnhr2z7NRNNtswKw5y8dbk", "question": "Where are these elephants located?", "choices": ["zoo", "wild", "museum", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547019.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 220358, "question_id": "WoQUtntUbcyEpNp6Jfq2iY", "question": "This family of animals is called what?", "choices": ["canidae", "felidae", "anatidae", "bovidae"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000220358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168496, "question_id": "WoV25TJqp3Czdf9HY6eHBN", "question": "The spikes on the building ledge is to prevent what animal from being there?", "choices": ["birds", "cats", "bats", "mice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168496.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475192, "question_id": "WpGx8Qj98Uw2gYb54ggtpy", "question": "If it gets even colder what area will freeze?", "choices": ["turquoise", "brown", "blue", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000475192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443608, "question_id": "WpTBhyYaq9iBqJpyTddpt2", "question": "The resort advertised in blue is located in which mountains?", "choices": ["selkirk", "rockies", "robson", "logan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427973, "question_id": "Wq86AeHnqVMY4VAc6orjpR", "question": "The dark bars hanging on back wall are designed to hang what?", "choices": ["underwear", "soap", "towels", "snacks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427973.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536218, "question_id": "Wq8RoVRNJjBdiPF9s3vnox", "question": "What animals are looking at the ground?", "choices": ["pig", "rhino", "zebra", "camel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172972, "question_id": "Wqdn83MgjZcxJivTUyiThi", "question": "How many species including human are clearly visible here?", "choices": ["one", "two", "four", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172972.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548427, "question_id": "WrXuaJ5wBwqAR6q8xPKyeD", "question": "What is the main function of this room?", "choices": ["working", "sleeping", "eating", "watching tv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17228, "question_id": "WsyD8VS5TnwxMhvC8hD3Qo", "question": "These animals have what feature?", "choices": ["spots", "antlers", "stripes", "tusks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179494, "question_id": "Wtau5oRi5ZQ6L9s4Pfjdj4", "question": "What kind of activity is this bird capable of doing?", "choices": ["hopping", "flying", "diving", "burrowing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179494.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575292, "question_id": "WxwxqYf2hwqCSaG82X2nYx", "question": "The hall of fame for this sport is located in what city?", "choices": ["baton rouge", "des moines", "newport", "frankfurt"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000575292.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465557, "question_id": "WypomVJyxacqwFA9vQkpKB", "question": "What is the style of home in the background called?", "choices": ["craftsman", "colonial", "victorian", "tudor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560406, "question_id": "WysKvjuD7N86GGUwbYDbT4", "question": "Who is transported in this vehicle?", "choices": ["police only", "army", "recruits", "detainees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560406.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463957, "question_id": "WzBYFuh9DxbGLsoG5a63Rq", "question": "What is this dressed used for?", "choices": ["bride's maid", "sun", "wedding", "prom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463957.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411306, "question_id": "WzYSDw6mz5JiHhbc4qf9Tm", "question": "Which one of these would be useful to clean his socks?", "choices": ["broom", "vacuum", "steamer", "bleach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 236655, "question_id": "WziXHYVuwZeAkaRYM2pSpR", "question": "The color of her clothing and gear will help with what?", "choices": ["visibility", "protection", "speed", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000236655.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50719, "question_id": "X25ZEaeJFfEuPdG4EuW79F", "question": "What type of range is shown here?", "choices": ["mountain", "gun", "gas", "firing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146514, "question_id": "X36N2BRNjDGZ4tmmbXExNg", "question": "Why are they facing opposite directions?", "choices": ["fighting", "lost", "despondent", "winding trail"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000146514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320697, "question_id": "X3YE9NKKsY6RVp2bLQ2B2a", "question": "Why is he on the ground?", "choices": ["hungry", "resting", "injured", "fell down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554979, "question_id": "X3mrfvsmR2wg8f3FwAHUWe", "question": "What is next to the clock?", "choices": ["figurine", "salt shaker", "baby", "basket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000554979.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256321, "question_id": "X5qDSJu6chVZoUQSnpEcDQ", "question": "What are these animals doing?", "choices": ["running", "playing", "swimming", "fighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256321.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389377, "question_id": "X65gvfRogrKf6BysitAncL", "question": "What is typically done on the food to the left to get rid of sand particles and other undesirable stuff?", "choices": ["flipping", "devein", "salting", "boiling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 241400, "question_id": "X6XpSRwMTYxR3vVLMytVmN", "question": "Who is Sinclair Lewis?", "choices": ["football hero", "sign maker", "author", "politician"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000241400.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485679, "question_id": "X7WUKVJQoiFCG5z3TYtfQ3", "question": "What kind of human is depicted on the vase?", "choices": ["weight lifter", "president", "priest", "soldier"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485679.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 359971, "question_id": "X8Uc9CCgfS3KMT8QX3Lx48", "question": "The photographer of this shot stands where while taking it?", "choices": ["board", "shore", "ship", "deck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000359971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561915, "question_id": "X8qZb24GMz4NxFePtiDuyR", "question": "What is the blue car near?", "choices": ["curb", "cat", "traffic controller", "feather"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561915.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90, "question_id": "X9WcfxdqzMD8oFwULfDv3B", "question": "What keeps this animal in place?", "choices": ["electric fence", "picket fence", "cyclone fence", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000090.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79894, "question_id": "X9xtzmJB678XdtvUVq8Tyk", "question": "What brand are the man's shoes?", "choices": ["puma", "nike", "reebok", "adidas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20353, "question_id": "XA3YtjhNLZWYijx2EnPhvW", "question": "Why does the woman have her arm out?", "choices": ["balance", "wave", "gesture", "reach"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000020353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368533, "question_id": "XAMyKT8fueigGdZiEvKGu9", "question": "What is needed for this activity?", "choices": ["shoe", "skate", "board", "ski"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271809, "question_id": "XAhfBThMX7G9ScMRYKeQtZ", "question": "What aspect of the first two animals seen here might cause a bloody wound more easily?", "choices": ["horns", "tail", "ears", "nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171615, "question_id": "XAzJxrVqrDogJXP23str3e", "question": "What type of phone is being used?", "choices": ["pay", "rotary", "landline", "cellular"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171615.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195498, "question_id": "XB34YuBmhc74HrTtZZKKDQ", "question": "What is the name of the game being played with the chain hoop?", "choices": ["disc golf", "soccer", "basketball", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579128, "question_id": "XBe9vzgyv7FpamqcpxCo4X", "question": "What food comes from the place where this sport originated?", "choices": ["poi", "pizza", "sushi", "dim sum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58128, "question_id": "XC2H3HApQwNVxji7VFWoKd", "question": "What do the silver metal pieces go into?", "choices": ["copy machine", "stapler", "printer", "hole punch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000058128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289265, "question_id": "XChhLYM5mxJ6nC5Dq7wYDw", "question": "This elephant under the man is found on which major continent?", "choices": ["asia", "europe", "australia", "africa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52245, "question_id": "XCiwEEseSkcsrhaDoxTxHW", "question": "What animals are known for being in this setting?", "choices": ["domestic cat", "mountain goat", "tiger", "eel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052245.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16649, "question_id": "XCx54xpfwFTeWs82XoJ6bx", "question": "How long can they last in this container?", "choices": ["few decades", "few days", "few months", "few centuries"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000016649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 571233, "question_id": "XEJZJaX5zAkiXh3ntswrMz", "question": "What propels this person forward best?", "choices": ["sun", "dolphin", "paddle", "boat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000571233.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105794, "question_id": "XFELGkLapk7oqnkY6QDRqg", "question": "Who manufactured this mouse?", "choices": ["hp", "compaq", "logitech", "dell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35362, "question_id": "XFR4dQt5Ror8Jh7kWm6xby", "question": "The animal is in what kind of an environment?", "choices": ["crowded", "desert", "watery", "snowy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271086, "question_id": "XHDEEj7cWApjc4pgCYXrXC", "question": "What kind of flooring is most abundant here?", "choices": ["vinyl", "hardwood", "carpeting", "tile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562829, "question_id": "XHuezeq6VNB3dQbzMnh3Ec", "question": "Where did the child get a booboo?", "choices": ["neck", "hand", "head", "arm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424195, "question_id": "XHyj9fDEFoZd2tCYdYVPmc", "question": "What kind of celestial object is visible on the sign?", "choices": ["moon", "jupiter", "earth", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424195.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256947, "question_id": "XKEJUgrb3agVVqjLf3G9Wm", "question": "What kind of ride is in this area?", "choices": ["ferris wheel", "carousel", "bumper cars", "roller coaster"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 261975, "question_id": "XKjGpsCw2rzotaXNVFaXYV", "question": "Which side of the plane does the photographer sit when facing the cabin?", "choices": ["left", "rightmost", "cabin", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000261975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495566, "question_id": "XLZ7529bVC86Lv43Kn2rTR", "question": "What type of animal is on a leash?", "choices": ["tiger", "lion", "bear", "goat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80290, "question_id": "XMZZtJSsSNxepF8JEXri8K", "question": "If you need to blow your nose the tissue comes from what kind of container?", "choices": ["metal", "marble", "glass", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080290.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195933, "question_id": "XMnbKJgvx2zfdNsGQfGLRG", "question": "What is the temperature?", "choices": ["warm", "cool", "cold", "hot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286383, "question_id": "XMxmKhmw9LynSmZrYXUw8Z", "question": "What type of activity does the man have the gear to do?", "choices": ["sledding", "snowboarding", "skateboard", "kite surf"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000286383.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38607, "question_id": "XNqaPqJRoZPYeiKfrDxpnP", "question": "What rodent are the animal costumes here meant to represent?", "choices": ["moles", "skunks", "chipmunks", "ground hogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442579, "question_id": "XP89oh7ytHg4Us3my5WhN3", "question": "What is the grey masonry in the room surrounding?", "choices": ["bookshelf", "entertainment center", "closet", "fireplace"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31701, "question_id": "XPF7cynH6BoH2GB88MoD2d", "question": "Which one of these is an ingredient that goes into making the container?", "choices": ["gold", "honey", "cotton", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431411, "question_id": "XPeQjGdSdoyVk3ePMq4vi5", "question": "What is the parent company that owns this bus service?", "choices": ["pennine", "centrebus", "trentbarton", "deutsche bahn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431411.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490115, "question_id": "XQ3Tcg34tGGGmiLrGqd6v8", "question": "The sign modification was made by a what?", "choices": ["vegetarian", "butcher", "zoologist", "rancher"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490115.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272537, "question_id": "XQZdQE8iA4KSr6j8TTjx6t", "question": "The white fibers visible on top of this blanket are what?", "choices": ["dog hair", "frost", "person hair", "cotton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576559, "question_id": "XSfzAoMtbCKR98YuJ8N5DS", "question": "What kind of flotation device is this man using?", "choices": ["survival raft", "surf board", "life preserver", "log"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576559.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289401, "question_id": "XSiSHWWJWAmH8XkkPynaww", "question": "What temperature might someone keep things here?", "choices": ["absolute zero", "cool", "boiling", "hot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252972, "question_id": "XULJvBK7v6Dy72TjP9MYdY", "question": "What proves the bus is behind you?", "choices": ["trees", "weather", "mirror", "sign"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252972.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85178, "question_id": "XV8aS76tyXM4gBagS4MRqL", "question": "The items on the plate next to the pizza and knife most likely resemble what?", "choices": ["mud", "ribeye steak", "rice ball", "eucalyptus leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085178.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162349, "question_id": "XWAJndMrgbqne85pUQLsUn", "question": "What type of location is this?", "choices": ["coastal", "rural", "arctic", "urban"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439599, "question_id": "XWnKmNwP4DejmddfJmiryX", "question": "What food comes from this animal?", "choices": ["snake skin", "burger", "lamb chops", "rabbit stew"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439599.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306916, "question_id": "XYPHsM57gAUgsKfYTnNRVB", "question": "What is the plane in all likelihood transporting?", "choices": ["passengers", "cargo", "weapons", "soldiers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306916.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490163, "question_id": "XYraYLgcKX3PmkA6qGzB3y", "question": "What is being balanced on this person's lap?", "choices": ["bag", "child", "laptop", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490163.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147295, "question_id": "XZtrRtXWoaxxH6pt2XVsoN", "question": "If they squeeze the fruit onto the food it will give it what kind of taste?", "choices": ["sweet", "umami", "bitter", "tart"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147295.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190701, "question_id": "XaKfeFQguGnhf9LcnWufkw", "question": "Why is the man wearing green?", "choices": ["halloween", "wedding", "fashion", "visibility"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 240499, "question_id": "XaeD8dWauTEmrR6qjgBRMp", "question": "What is being done to the baby animal?", "choices": ["testing", "feeding", "vaccinating", "collecting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000240499.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438644, "question_id": "XaupKF24ox4mfnxTKGZc3L", "question": "What is the train riding on?", "choices": ["water", "grass", "street", "railroad track"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438644.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350490, "question_id": "Xb5mwWngPzctVPseKbGD75", "question": "What number comes sequentially after the number on the blue item?", "choices": ["eight", "four", "ten", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350490.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184593, "question_id": "XbkPog4S85hLAdy4mA5Z29", "question": "Which one of the following would be a concern if one lived in one of the houses in the background?", "choices": ["loneliness", "noise", "price", "boredom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184593.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 361466, "question_id": "Xc6jdMzsEL4U3ME3PDCdLZ", "question": "From what did the Giraffe get stains on their front knees?", "choices": ["dribbling", "kicking", "kneeling", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000361466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309624, "question_id": "XciCrNkaTSrARgzoGHbdFT", "question": "Why is the bus parked?", "choices": ["no gas", "flat tire", "lost", "awaiting passengers"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000309624.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 149082, "question_id": "XeM3hdSMhcvNvkBaiU9np6", "question": "What is usually found in this kind of location?", "choices": ["stalagmites", "snow", "telephone poles", "seashells"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000149082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346020, "question_id": "XedeQTXg9447uzDXJjBqs8", "question": "What type of suit is worn here?", "choices": ["fireman", "wet", "business", "police"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346020.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 480998, "question_id": "XfoDLYpg4PWGLuhCviJgBF", "question": "What type wheels does the conveyance shown here roll on?", "choices": ["rubber", "metal", "none", "plastic"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000480998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556664, "question_id": "Xg73UMHSmSXNRHZc3CRXqB", "question": "Why is one zebra smaller?", "choices": ["not eating", "sickly", "is child", "mutation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207495, "question_id": "XgCgWGbYzpSJPWLrE7eNAK", "question": "What is the name of a famous cartoon character of this type?", "choices": ["donald", "garfield", "pluto", "yogi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207495.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152697, "question_id": "Xgu2HCq7ZTn5ojHWyb27DB", "question": "What time is shown here?", "choices": ["90019", "44500", "91900", "124519"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163121, "question_id": "Xj5B5VaqDuz3TNaVzVgBSE", "question": "What is the actress original last name?", "choices": ["scicolone", "russo", "bianchi", "ricci"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163121.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511581, "question_id": "XjZS6jcKYFzBFsWBsqh4RL", "question": "What is he doing?", "choices": ["braking", "hiding", "stalling", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551415, "question_id": "XmKj2MPKghzYKdMMcNzHcd", "question": "What is the bird doing?", "choices": ["sleeping", "hiding", "resting", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115120, "question_id": "XmTk7g8by6rHZkYys4Jo9p", "question": "What is near the person?", "choices": ["snow", "dogs", "goats", "rabbits"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433514, "question_id": "XnYEdi6PHPmLtbqJTE9mXt", "question": "How is a group of these animals called?", "choices": ["nest", "dazzle", "school", "herd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197333, "question_id": "XoZ3FuSxQrVhMPuX8xyHyQ", "question": "What company makes these kind of items?", "choices": ["black decker", "mcdonald's", "huffy", "pier 1"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79078, "question_id": "Xp2HW6XKjEsD9WrqmeA8Fg", "question": "From which direction must ongoing traffic stop when entering this intersection?", "choices": ["none", "it depends", "all", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80174, "question_id": "Xq9WK2SvE9MNxyLFxuzmqF", "question": "What was she doing with her hand?", "choices": ["slapping giraffe", "feeding giraffe", "shoving giraffe", "opening window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080174.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365046, "question_id": "XqhMQLna6sWxtfTkZLDNco", "question": "Why are the trains different color?", "choices": ["different lighting", "different owners", "different styles", "wrong tracks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16686, "question_id": "XrWToY8vMwmdXuf27FU9pV", "question": "What is most likely behind that curtain?", "choices": ["bed", "shower", "toilet", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016686.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63957, "question_id": "XraDX83QiHbwMamxxmYJ2M", "question": "For hygiene purposes what should they do before eating off the plates?", "choices": ["wipe them", "wash them", "nothing", "inspect them"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063957.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125592, "question_id": "XsF3EDSjjZWY6QknteTzP6", "question": "The person who decorated this house is a fan of what show?", "choices": ["dr. who", "star trek", "stargate", "x-files"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123313, "question_id": "XsULgrzKhH5VnbPiDLRj62", "question": "What energy drink is advertised on the shirt?", "choices": ["red bull", "gatorade", "rockstar", "monster"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123313.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360413, "question_id": "XtQ4Q8qfaBf2pXV8rgb2Hi", "question": "What might someone have touched before leaving prints on the ground here?", "choices": ["hair", "dogs", "fireplug", "clouds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325926, "question_id": "XtUVmrHDTnpet8NVR8T7ak", "question": "Where is the bus?", "choices": ["garage", "city", "house", "park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325926.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153801, "question_id": "Xu3cmHVMex2MiePs2WqbXQ", "question": "What island is this bus on?", "choices": ["oahu", "madagascar", "sicily", "easter island"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153801.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19792, "question_id": "XumdGZDaoZ8rMnkqVm3fSz", "question": "What kind of surface is the dog resting on?", "choices": ["mattress", "leg", "couch", "carpet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019792.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15958, "question_id": "XvXjLJdRz3PKDuz65qRZZ2", "question": "What is near the stores?", "choices": ["man", "bicycle", "car", "cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57909, "question_id": "XxDkUi3WximhsymY8aiL6J", "question": "In which country are these double yellow lines by the curb found?", "choices": ["australia", "united kingdom", "united states", "germany"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000057909.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328258, "question_id": "XxSjHWiHWJzNp2jhdFdQ2f", "question": "The person taking this picture got here how?", "choices": ["uber", "scooter", "large cow", "small cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177497, "question_id": "Xyfg7H6D8snV3QJTwse2Dm", "question": "What is the name of a famous vehicle of this kind?", "choices": ["thomas", "ben", "andrew", "bill"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177497.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557674, "question_id": "XyiGft2DymP86hPLi3gHta", "question": "What is this bear trying to do?", "choices": ["bathe", "hide", "close", "open"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304515, "question_id": "Xznifk6qCjrXsDupDT6f2b", "question": "What items are bunched together here?", "choices": ["luggage", "bananas", "streamers", "weapons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304515.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277863, "question_id": "Y2gdd2Z6BVmt76XmLafc8c", "question": "What type of animals are swimming in the water?", "choices": ["shark", "fish", "whale", "swan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 430909, "question_id": "Y3jGyxvqzkX3bGNUJJYqw3", "question": "What is a way we can describe the animal here?", "choices": ["primate", "feline", "amphibian", "canine"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000430909.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575195, "question_id": "Y3mLF4QrKSdLM9ntag4ogi", "question": "Why is he standing like that?", "choices": ["maintain balance", "is old", "is afraid", "falling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575195.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305553, "question_id": "Y4ANFH5Qpo7EBuV8KftfKM", "question": "What is the tallest thing in the picture?", "choices": ["giraffe", "fencing", "trees", "fence post"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305553.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235232, "question_id": "Y4YCzQmUVasC3zirFgsQTc", "question": "Which one of these restaurants is known for selling this style of food?", "choices": ["taco bell", "quiznos", "starbucks", "burger king"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000235232.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279709, "question_id": "Y5B4sWuoG4L8H4MLyvocua", "question": "What is the young man in black shorts trying to catch?", "choices": ["fly", "football", "baseball", "frisbee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 72506, "question_id": "Y5ZEjJn6Qy5itmoYGcnzmA", "question": "Based on the size where would someone find this type refrigerator?", "choices": ["dorm room", "house", "break room", "mansion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000072506.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230180, "question_id": "Y8GhYxetAJBAAbxUEvT866", "question": "Regarding the ad seen here if you try one today how likely are you to win 4 million pounds?", "choices": ["25% possible", "100%", "very unlikely", "50% odds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230180.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124519, "question_id": "Y8YgSFu4GQrurKhSdi6Vaw", "question": "Where might this person be soon?", "choices": ["sky", "lake", "farm", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124519.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 203934, "question_id": "Y8hqm9xHMqQhCKBsL2rR4r", "question": "This beverage company was started in what country?", "choices": ["portugal", "sweden", "guam", "namibia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000203934.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310816, "question_id": "Y8qf47qTYYFwCgyx95C8eD", "question": "Why is her hair such a bright color?", "choices": ["dyed", "paint", "sick", "natural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310816.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550884, "question_id": "Y9qeMQ3bDqCgzQHDGmhkqW", "question": "Why would a human bring this horse to the beach?", "choices": ["graze", "drink water", "sleep", "recreation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550884.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304234, "question_id": "YA6Q3YuwHcpq6NvhdZeb25", "question": "What most recently happened to a person here?", "choices": ["photograph", "fell", "singing", "balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466472, "question_id": "YAKWzamAjmxppRu3fewQMn", "question": "What will he do with his hands?", "choices": ["clap them", "catch frisbee", "wave them", "lift them"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466472.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560315, "question_id": "YAVrsQKPynnhkHx6QicB6a", "question": "What kind of a shirt is the man wearing?", "choices": ["formal", "short sleeve", "long sleeve", "vest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97060, "question_id": "YAvUqq4ZGJAsGcbEVGeBXe", "question": "What type of art style is clock in the center of the station constructed in?", "choices": ["art deco", "modern", "brutalism", "art nouveau"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097060.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303353, "question_id": "YBsapmifJJQgpFxmGNPAC8", "question": "What kind of condition would this bear have if it were alive?", "choices": ["conjoined twins", "deafness", "obesity", "albinism"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000303353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265634, "question_id": "YDBUS2faX3dex67ouxxMzU", "question": "What part of the man is closest to the chain?", "choices": ["head", "leg", "hand", "bicep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265634.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437192, "question_id": "YEK2uqaZVVxJ7o2RbcmSoi", "question": "Which direction are most of the eyes looking?", "choices": ["down", "left", "right", "up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000437192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 359252, "question_id": "YGPT3JafmiHvjHwtCWjPZv", "question": "What is the metal basket on the brick wall used for?", "choices": ["decoration", "storage", "basketball", "feeding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000359252.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550884, "question_id": "YGo9sJvheYXsouRSVZDRyn", "question": "Where does the horse breathe from?", "choices": ["mouth", "nose", "neck", "skin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550884.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391997, "question_id": "YJ8PRHLG4oPwKFeaufTFmQ", "question": "What are the people pictured above doing?", "choices": ["maneuvering", "playing", "skate boarding", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314258, "question_id": "YJoXGX5tnKe4b8Uz3eXarM", "question": "This company's distillery is located in what state?", "choices": ["washington", "michigan", "tennessee", "delaware"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91439, "question_id": "YLe9UyFKova7esw3yTn2A9", "question": "How many types of railway signals are there?", "choices": ["two", "five", "one", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091439.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409907, "question_id": "YMJmgbfw64QnktkTFasUx3", "question": "Where is the man surfing?", "choices": ["sand", "grass", "water", "pool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545451, "question_id": "YMLuZtCmFqooRpw3v4PxNS", "question": "The cord hanging from this board is meant to be tied where?", "choices": ["ankle", "neck", "wrist", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545451.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536253, "question_id": "YNSdGrfgFwSUDSuiKFJWNn", "question": "What type of bear shares a name with the skateboard?", "choices": ["polar", "grizzly", "black", "panda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536253.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517060, "question_id": "YNU4LhnhupCuConMjMvvmn", "question": "What limited liability abbreviation does the energy drink company use?", "choices": ["llc", "gmbh", "dba", "ltd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517060.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182183, "question_id": "YQe4GByMhAmZ3hmGGvUb28", "question": "What would theoretically be the favorite cereal of this animal?", "choices": ["captain crunch", "trix", "count chocula", "honey combs"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000182183.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525892, "question_id": "YRvCJEJXCbJcgMrZVwubt8", "question": "What does the bird appear to have in its beak?", "choices": ["branch", "seed", "sunflower", "worm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8271, "question_id": "YSYy3hwv46Nyxp8CeBEV5m", "question": "How can one describe the colors on the bird?", "choices": ["cold", "dark", "vibrant", "dull"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008271.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557702, "question_id": "YSydfAtN6iBiPwDfBtopdW", "question": "What song speaks to the definition of the picture?", "choices": ["blurry", "blue", "crystal clear", "green river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557702.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342329, "question_id": "YVJavGfeCkGjAzwxDdWKZ8", "question": "That plate is most likely made of what material?", "choices": ["glass", "paper", "porcelain", "ceramic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5905, "question_id": "YVMRCtiTUtru9SyxatSzXv", "question": "What are the white granules present in the left silver container?", "choices": ["salt", "msg", "splenda", "sugar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005905.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 225221, "question_id": "YVWYbWiSojr7S4bSDvYjp6", "question": "What is this bird looking for in the sand?", "choices": ["electronics", "clothing", "car", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000225221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486106, "question_id": "YViGvtvvseChkgEH6qBPJu", "question": "What does the intend to do with the bird?", "choices": ["keep it", "eat it", "give owner", "sell it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557331, "question_id": "YVrbqPJKURb9xjx6bRypHM", "question": "What type of terrain or snow organization allows this person to be aloft?", "choices": ["pit", "flat", "upward slope", "ditch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557331.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50974, "question_id": "YVzbT72mgFJLofQZipiJ9n", "question": "These books would make great gifts for people who practice what hobby?", "choices": ["gardening", "golf", "chess", "crocheting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050974.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218871, "question_id": "YWwv6tQS95dMa6H59UsafL", "question": "The person here attempts to capture what?", "choices": ["fish", "criminals", "dogs", "flies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483600, "question_id": "YXjNZTuYsdrkiHko3WKtZk", "question": "Why would one sit at this table?", "choices": ["eat", "work", "file", "paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552770, "question_id": "Yar8NJwWtT4tryd3v3azdd", "question": "What has been used to cover the walls?", "choices": ["tapestry", "tile", "wallpaper", "paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552770.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482018, "question_id": "Yaz9nWtiHzkqKxjJ3daNd8", "question": "What makes the backs of these creatures appear white here?", "choices": ["dung", "moss", "dandruff", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482018.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521399, "question_id": "Ycg8hErm44KkEwy3qEJTAK", "question": "What is on the pizza?", "choices": ["red peppers", "spinach", "grilled chicken", "onions"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521399.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358937, "question_id": "YcioWkDmGndxjHCeLxyvL5", "question": "What here has the most unusual feature?", "choices": ["tile", "floor", "toilet", "plunger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 322778, "question_id": "YcuCAWQkszTo4nffYZDzWx", "question": "How many people are engaging in an activity or just recently engaged in an activity with their mouths here?", "choices": ["three", "zero", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000322778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109499, "question_id": "YfNqAexjWjmWHTgV4tEHN3", "question": "Who became president after him?", "choices": ["trump", "biden", "bush", "clinton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109499.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249342, "question_id": "YfQhQJYmKLuBEujMwUoBJw", "question": "What is the weather like in this image?", "choices": ["dust storm", "raining", "sunny", "snowing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249342.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130308, "question_id": "YfeyqPg7EZ6pbsFbvem6Cd", "question": "What is the owner likely doing with the dog?", "choices": ["fetch", "obstacle course", "walk", "rope tug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68831, "question_id": "YiX7D4TiVfsUyeyvunkGzc", "question": "What is the same color as the jacket the person is working?", "choices": ["lime", "plum", "orange", "carrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068831.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171636, "question_id": "Yic35GANsBYCBa7DedxMQb", "question": "Which continent is this definitely NOT?", "choices": ["australasia", "south america", "europe", "antarctica"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171636.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554905, "question_id": "YioRARjYGbhTJgvuUapCcK", "question": "The man here has trouble with what?", "choices": ["betting", "defense", "drinking", "walking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000554905.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21505, "question_id": "YkPg7bkQ5gTARcexjuEidW", "question": "Where is the bench located?", "choices": ["on shore", "above water", "in water", "inside water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99175, "question_id": "YkfeoYtppEThN8skDCPqNB", "question": "What is the blue house logo made of?", "choices": ["cream", "cotton", "yarn", "denim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406573, "question_id": "YnVzREYw7abmYy4uWfzceM", "question": "What human feature are the zebra stripes compared to?", "choices": ["fingerprints", "wrinkles", "skin", "hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000406573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293055, "question_id": "YnXxW3w7e4tZgrba5Gw5Ur", "question": "What is the pizza being presented on?", "choices": ["pizza pan", "lazy susan", "wooden block", "cake stand"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000293055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147991, "question_id": "Yomu2vmoUToLz6gPe2LfBp", "question": "What is necessary to get this toothbrush to function properly?", "choices": ["battery", "mouth wash", "soap", "cord"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417666, "question_id": "YpQCfqPY2f92studQSqoPN", "question": "What type of body of water is past the beach where the hydrant is located?", "choices": ["fjord", "river", "sea", "lake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417666.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310624, "question_id": "YqQLtFQJC2VtD9JZywHdLi", "question": "What is usually found on top of the item that is on the right hand side?", "choices": ["mustard", "coconuts", "rice", "beans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310624.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20190, "question_id": "YrVBALcuRXmpunTZ3QrerU", "question": "What animal has similar coat colors to the one all the way to the left?", "choices": ["gazelle", "rooster", "hyena", "dalmatian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020190.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 571602, "question_id": "YrrdJ57GKd6fBpRPxYjaAL", "question": "What is the surface the cat is laying on?", "choices": ["sofa", "table", "floor", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000571602.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142292, "question_id": "YsFL7BJB2t35jrXTu3JehB", "question": "What kind of cooked French food is this?", "choices": ["croissant", "egg roll", "quiche", "tofu"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142292.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 225660, "question_id": "YsL3utmNS6QT7biki53sXy", "question": "The dog is tied to what?", "choices": ["pole", "bench", "fence", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000225660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7904, "question_id": "YsSWyAYUvk2Yhh2MUZjLsF", "question": "What kitchen appliance is seen?", "choices": ["microwave", "dishwasher", "stove", "fridge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250482, "question_id": "YsTs8m9wCGLHwAUMrTaiTp", "question": "The pizza here would NOT be eaten by whom?", "choices": ["carnivore", "dog", "vegan", "omnivore"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189840, "question_id": "YsY9LjnqwXnrWVb6FANovj", "question": "What kind of text is displayed on the bottom of the stop sign?", "choices": ["advertisement", "graffiti", "warning", "help wanted"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189840.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44731, "question_id": "Yt9Qni3hNsuxZVGxd2d2YT", "question": "What company is known for making the item the man has on his face?", "choices": ["nivea", "invisalign", "topps", "ray-ban"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409596, "question_id": "YtGJ5n3nbdNbadAbearLit", "question": "People might park on this road if it was closer to what?", "choices": ["park", "buildings", "liquor store", "houses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409596.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277853, "question_id": "YtnCjZhzArTUcmP2amHwLS", "question": "What purpose do the chains here serve?", "choices": ["alarm system", "decorative flourish", "none", "retain lids"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71663, "question_id": "YuEhYhfK7GtuRUGbCZAvcS", "question": "What type of move is the snowboarder performing?", "choices": ["360", "grind", "illegal", "twisty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366228, "question_id": "Yutcu6V5HXEpyQpdmaHUVY", "question": "What must someone have before getting on this conveyance?", "choices": ["driver", "dinner", "ticket", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413140, "question_id": "YvwDQnc4zJReSnxwWfRhb8", "question": "What sort of creatures made the red mound shown here?", "choices": ["worms", "crows", "termites", "elephants"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000413140.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259358, "question_id": "YyU3Yo9z7r4Mcw8xjv55Ca", "question": "What area of the bus allows passengers to embark on top of it?", "choices": ["front right", "back left", "back right", "front left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17836, "question_id": "YywUnYnRT7gd2ub4ZrVhcr", "question": "What is usually found in this environment?", "choices": ["fish", "monkeys", "camels", "giraffes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017836.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263933, "question_id": "YzZRvee3DH6CZXv4fo4JXb", "question": "What is the woman most likely using the device to do?", "choices": ["race", "text", "eat", "draw"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000263933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577151, "question_id": "YznMsbtAqVFVe4NrvJX6DV", "question": "What is located to the left of the sign?", "choices": ["bowling ball", "traffic cop", "horse", "bridge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 217728, "question_id": "YzoPxFeeSmV8CSHn8j9mL6", "question": "What is the man on the white board trying to do in the water?", "choices": ["race", "dive", "ride wave", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000217728.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157620, "question_id": "Z3D6ZvDCPhDUtZJ6jZn3ke", "question": "How is this train powered?", "choices": ["steam", "coal", "battery", "electric"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157620.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508756, "question_id": "Z3u9KvKR2tdoVSHpWeGAZV", "question": "What type of area is shown?", "choices": ["tropical", "residential", "coastal", "commercial"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000508756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575609, "question_id": "Z4VsEQdgRUXt2jDi9zonqE", "question": "Why is he holding the dog?", "choices": ["for sale", "keeping warm", "hiding", "showing off"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575609.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5980, "question_id": "Z5J9JN8eXZX2Yi6JTCPuzU", "question": "What is the name for the item on the right?", "choices": ["commode", "sink", "shower", "bidet"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000005980.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482980, "question_id": "Z5fumdLwMp7ppyLpPJsjbA", "question": "What is the tallest object in the area?", "choices": ["fence", "plants", "trees", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482980.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 14394, "question_id": "Z652BeDWhnVZMZwcMFd4k7", "question": "What would the wooden object be used for with relation to this food?", "choices": ["serving", "throwing", "chopping", "breaking up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000014394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466709, "question_id": "Z6GueYSxuW7SyEaqaJgmkx", "question": "Why is he squeezing the tube?", "choices": ["feels good", "wants mustard", "is broken", "heat hotdog"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000466709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192215, "question_id": "Z6SufhYSmxzmFg43bKmW7G", "question": "What type of light is being used?", "choices": ["screen", "moon", "lamp", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192215.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201586, "question_id": "Z6dTEKTY4yuhzrg5KVxbMw", "question": "Where does this city rank in terms of population among Canadian cities?", "choices": ["2nd", "1st", "5th", "3rd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397912, "question_id": "Z77Te4NL5pLy4B8pkY7cHx", "question": "In which way is this bear like Mark Spitz?", "choices": ["it swims", "gold chains", "none", "great hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397912.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108790, "question_id": "Z7GAtN2R4ZRamkrJn8X8QX", "question": "The tennis court in front of the bleachers uses which type of surface material?", "choices": ["clay", "gravel", "asphalt", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143220, "question_id": "Z9JWwtmSpzNV6wZiHic2Gw", "question": "Which one of these is a neighborhood in this borough?", "choices": ["francisco", "dalston", "piccadilly", "connecticut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000143220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439318, "question_id": "Z9UvhtEJK3FZZyURnp8XZK", "question": "The steering mechanism used on this board is called what?", "choices": ["fin", "wheel", "pivot", "axle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394347, "question_id": "Z9fapDJ2BDLcz5MQyxegJq", "question": "Where is the dog being allowed to walk?", "choices": ["in park", "in forest", "back yard", "on beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000394347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118464, "question_id": "ZAdxDZpEZycvAMvp79aGy9", "question": "Where is this cat located?", "choices": ["yard", "barn", "room", "vet"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000118464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402349, "question_id": "ZB2FHMojtGXTA4nTgFbQwN", "question": "What type of animal is naked on the green signs?", "choices": ["sheep", "fish", "insects", "bears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378505, "question_id": "ZBGWRYyQqci8Y9iBJdhnbF", "question": "What is present on the wooden block?", "choices": ["illusion", "nothing", "statue", "human"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222162, "question_id": "ZDCrNL2sTGGiu2x5RUErzZ", "question": "What will happen next?", "choices": ["eat food", "toss food", "cook food", "serve food"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000222162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487492, "question_id": "ZDTt7P6hkaUYgZaSpRVF8K", "question": "What is below the luggage?", "choices": ["grass", "feet", "violets", "playing cards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281639, "question_id": "ZDV3KD6FoNrFbq5JW2wD7j", "question": "What is on the CD?", "choices": ["software", "music", "instructions", "games"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281639.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69636, "question_id": "ZDcL4cbkNU5AZJnCozQanM", "question": "What brand is the guys shirt?", "choices": ["nike", "target", "carhartt", "skater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000069636.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387708, "question_id": "ZEMYftreHVDtU6irQPwsNY", "question": "What would be the best term for what the toilet is classified as?", "choices": ["repurposed", "de-serviced", "restored", "recycled"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414933, "question_id": "ZEeVwnoAk3MQcY5hJPfo69", "question": "These items will most likely be what?", "choices": ["burned", "dumped", "recycled", "displayed"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000414933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403588, "question_id": "ZEnM7xzG9wG9kTnqRNXs6h", "question": "What sound do people usually associate with this vehicle?", "choices": ["argh", "choo choo", "vroom", "whir"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403588.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363772, "question_id": "ZFCgN6QBzZubptRvbKxUgH", "question": "What is another name for the animals listed on the sign?", "choices": ["cameroon", "dorper", "bighorn", "merino"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363772.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286652, "question_id": "ZFkqHmTeMKvcTg96yZVL3B", "question": "Why is the zebra engaged in this activity?", "choices": ["hunger satisfying", "predator evading", "thirst quenching", "fish watching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286652.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 430726, "question_id": "ZG8HKo33txtnqZ5U8CRKxu", "question": "What type of entity recently stood near this dog?", "choices": ["dog", "person", "whale", "rat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000430726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387836, "question_id": "ZGb28JDKZUijgDjCbudcAJ", "question": "What is the giraffe looking up towards?", "choices": ["tree branches", "airplane", "helicopter", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387836.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22574, "question_id": "ZHdWVQW5o7BSQinXZy2muk", "question": "The car is sitting on what car brand?", "choices": ["porsche", "toyota", "bmw", "audi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022574.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379695, "question_id": "ZJoNTHF8tV6THi6vpxDcEo", "question": "In which city does the stop sign belong to?", "choices": ["toronto", "calgary", "vancouver", "winnipeg"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379695.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549863, "question_id": "ZKrKoEj9nHtes4Tf5i9uwV", "question": "What would most likely be found on the large brown item on top of the rug?", "choices": ["jaguar", "elephant", "artificial grass", "chess board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389891, "question_id": "ZMa83sGE7JnXukv9MeJPzb", "question": "How many towels are visible including those in mirror reflection?", "choices": ["two", "four", "six", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389891.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510238, "question_id": "ZMxrjLrQmLVqbJNrnbWuLu", "question": "How will they get rid of the bananas?", "choices": ["give away", "sell", "discard", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510238.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49467, "question_id": "ZN68K3XnTkdPjr5U6dcvTs", "question": "What is the healthiest item on the plate?", "choices": ["kiwi", "orange", "sausage", "pork rind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049467.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419629, "question_id": "ZNDKCcGaEZZme7LXYPyq5g", "question": "What emotion is the baby feeling?", "choices": ["fear", "indifference", "happiness", "sadness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419629.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180566, "question_id": "ZNZuBpz43HUgrBrsoGyqfs", "question": "The wheels are most likely made of what material?", "choices": ["cloth", "glass", "wood", "polyurethane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139172, "question_id": "ZPC3siCHK74MkzQJnA6HLz", "question": "What is he holding?", "choices": ["skates", "poles", "snowboards", "skis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139172.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71469, "question_id": "ZR5tvq2jfqLrcLZr5QrKpG", "question": "What protective gear should this player wear?", "choices": ["gloves", "goggle", "knee pad", "helmet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352326, "question_id": "ZR62kK7HJhvVHtXTCV3isH", "question": "What network does this show air?", "choices": ["nbc", "fox", "netflix", "amazon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352326.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58742, "question_id": "ZRaExkJHnXiXzr4TfArh3A", "question": "What is the average height of the fire hydrant?", "choices": ["6 feet", "5 feet", "2 feet", "3 feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000058742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501662, "question_id": "ZSiSo2qzqe29TERfibjwia", "question": "What animals are pictured?", "choices": ["chicken", "horse", "cow", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501662.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205337, "question_id": "ZTApneoBVGQxdyMLTV9tJW", "question": "The origin of the strawberry yogurt is from which country?", "choices": ["spain", "france", "poland", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205337.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193774, "question_id": "ZTfe3isxWjYd4ARNWhu3gN", "question": "What animal are these creatures said to be scared of?", "choices": ["mice", "snakes", "birds", "bats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193774.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 236621, "question_id": "ZTyLCbuVLQnskxpzC5cYmT", "question": "How can you tell the owner of this room could be getting ready to run?", "choices": ["cats", "bed", "sheets", "shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000236621.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408704, "question_id": "ZW7ZLGVncPHBU839aSArRD", "question": "What type of transportation is shown?", "choices": ["water", "road", "rail", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000408704.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239024, "question_id": "ZWAoowujamXuYW6vMUbcBq", "question": "Which animal is in the lead?", "choices": ["adult elephant", "cheetah", "monkey", "baby elephant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260079, "question_id": "ZWTrWhWcnSKVGfJzseZdKr", "question": "What would the tie be perpendicular to if it is extended up and down an extra two feet?", "choices": ["baseball cap", "beanie", "glasses", "eyepatch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87496, "question_id": "ZXMe55yokKHUomyxmXyZvv", "question": "What is near the frisbee?", "choices": ["bench", "lake", "dog", "cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087496.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85736, "question_id": "ZYATZVqKHc7qDbz7tMoftK", "question": "What would be found in this climate?", "choices": ["polar bear", "banana", "seal", "walrus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085736.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21485, "question_id": "ZYgbYUbdNArRYQ8U7ssAXD", "question": "What is the man doing with the dog?", "choices": ["holding", "disciplining", "feeding", "selling it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177647, "question_id": "ZZ8nPWURBiLDkiAHdhyP4y", "question": "What type of company wanted to advertise on this bus?", "choices": ["cars", "electronics", "jewelry", "restaurant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177647.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459130, "question_id": "ZbWHZKNkUMErNiqjk8SMeV", "question": "What fuel do the cars that park here use besides gas?", "choices": ["ethanol", "electric", "hydrogen", "diesel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459130.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527115, "question_id": "ZcSmPiLGYDScGAssRnZiX2", "question": "Why is the person wearing goggles on his head?", "choices": ["is blind", "fashion", "help vision", "eye protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527115.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367878, "question_id": "ZccvwHXmrLPwjDMEc6c8Xj", "question": "What materials are the above teddy bears made of?", "choices": ["synthetic fur", "nylon", "mohair", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367878.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100876, "question_id": "ZdYkwXNR5Kogwsc9iDRBji", "question": "What kind of animal would fit through the fence here?", "choices": ["zebra", "giraffe", "elephant", "human child"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000100876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162812, "question_id": "ZdeAjgcC8NqbvMGg9W9CES", "question": "What country is this bus from?", "choices": ["france", "germany", "england", "denmark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162812.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432391, "question_id": "Zg9oWZd9SnNyMcX4XMsPrB", "question": "The white item under the window is usually sold by the what?", "choices": ["bushel", "roll", "thousands", "pound"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432391.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59500, "question_id": "ZgDmPGMNCEU3ddMBTJemMV", "question": "Which action is the surfer taking?", "choices": ["jumping", "turning", "falling", "reversing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059500.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229853, "question_id": "ZgxDB5Auz8diMfd5vkjHi9", "question": "How many signs are red?", "choices": ["two", "one", "four", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495977, "question_id": "ZhDPXFG2umv2JpovWTKZyV", "question": "How many animals are sitting in the grass?", "choices": ["four", "six", "one", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399381, "question_id": "ZhSxD6GowNVxtF3HfijdCZ", "question": "In which environment does the animal with white fur thrive the most?", "choices": ["polar", "south american", "tropical", "equatorial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399381.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190704, "question_id": "ZhTGeFRXDbcuGhmpMRpTYb", "question": "What is the fabric of the suit?", "choices": ["silk", "corduroy", "wool", "linen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190704.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381817, "question_id": "ZhUp2FkK9nX9NoUgr7Mo76", "question": "What is this player ready to do?", "choices": ["dribble", "shoot", "dunk", "swing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404993, "question_id": "ZhVHsxrVYQcNVKFPnn69ox", "question": "How many amino acids in Cow's milk?", "choices": ["nine", "five", "eight", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442920, "question_id": "ZhcZ9FUVZ7o8Ps8vvTRmbp", "question": "What body part is this person trying to get a look at?", "choices": ["watch", "back", "head top", "forearm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413911, "question_id": "ZjVZe7UqX3wW8V7SYUh9Lm", "question": "How many people are sitting in the row of the bus the photographer is seated in?", "choices": ["none", "one", "five", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413911.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350049, "question_id": "Zk2rYAZEJi493gtSZLxTHK", "question": "What is the birth name of his favorite super hero?", "choices": ["bruce wayne", "clark kent", "tony stark", "peter parker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350049.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358261, "question_id": "Zk5XoYbwex3vdocazNpyDm", "question": "What colour is the girl's hat on the right?", "choices": ["green", "black", "blue", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 300524, "question_id": "ZkFYnmQPaBMk2hLfUWLZBu", "question": "What is the vase wrapped in?", "choices": ["bed sheet", "label", "plastic", "crochet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000300524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 327400, "question_id": "ZkXg85GT7gyCutYS8znqZR", "question": "What is the large black rectangular object near the bookcase used for?", "choices": ["painting", "storage", "cooking", "watching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000327400.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539192, "question_id": "Zq3GdHjhokEcBzTZMfWVQT", "question": "What is the cat's claw touching?", "choices": ["bench", "television", "frog", "foot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 217443, "question_id": "Zqg7sWeX5u87En8RZTA8Xh", "question": "What color curtains would one close to get things darker in this room?", "choices": ["silver", "black", "pink", "brown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000217443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454189, "question_id": "Zr8t78QmAgbdwnFLW7QtYJ", "question": "What is the long part of the animal?", "choices": ["tail", "arm", "trunk", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454189.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462476, "question_id": "ZrHJhPfZLZ3jFd9bbd6dis", "question": "The apples are meant to represent what?", "choices": ["mouth", "eyes", "toes", "nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97060, "question_id": "ZrjUMDUDWUxvFdSTdnActh", "question": "What flag can be seen hanging from the wall?", "choices": ["venezuela", "united states", "mexico", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097060.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 244686, "question_id": "Zrt9grKzHDC7UMGG3m3jnq", "question": "Who owns this clock?", "choices": ["sun", "age", "mitsubishi electric", "orioles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000244686.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350011, "question_id": "ZsL4hmybCEc5MNFd36fGVM", "question": "What is the name of the game?", "choices": ["skiing", "snow boarding", "baseball", "skating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350011.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85667, "question_id": "ZsSfQMEE8txjoD74xvvUht", "question": "Why is the trunk curled?", "choices": ["eating", "lifting something", "drinking", "trumpeting"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000085667.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113423, "question_id": "Zt8fFzEkxEu3EYMdXsNZ8Z", "question": "In which country is this aircraft museum located?", "choices": ["switzerland", "denmark", "united states", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387775, "question_id": "ZuAYibpzubceBzPgJXvFGZ", "question": "What type of paper might you find here?", "choices": ["toilet", "typing", "parchment", "tracing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378141, "question_id": "ZuZE2b7cw6rf8ALQxvpGoc", "question": "What is high in the sky?", "choices": ["bird", "zeppelin", "snowboarder", "pie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51635, "question_id": "ZufpKRUowwjkmAUVgXY2ej", "question": "What fruit family is this street named after?", "choices": ["stone", "melon", "citrus", "berry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051635.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257902, "question_id": "ZvLBpWUc2vWwVxeZC2Tt49", "question": "What is most likely written on the tags?", "choices": ["warning", "poem", "cost", "traveler name"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257902.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40104, "question_id": "ZxSjYyHJ52TZdQPjJpCkXn", "question": "In which direction is the number 44 plane going presently here?", "choices": ["down", "up", "none", "backwards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15348, "question_id": "ZxdjQH4N9yfKavcc5qLiRj", "question": "What would this type of plane be called in to action for?", "choices": ["transport", "bombing", "fueling", "air defense"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29429, "question_id": "ZxhZCSqcQpYBYaAsUCcuAP", "question": "The second item that they sell measures what?", "choices": ["temperature", "humidity", "altitude", "air pressure"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117225, "question_id": "ZyTAccYCskHZ2UTf55dJHS", "question": "The bird here is most at home where?", "choices": ["parks", "water", "desert", "mountains"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000117225.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156750, "question_id": "ZycsRDqzC37HTm75dxK9HG", "question": "In which location is conditioned air blown from here?", "choices": ["stool", "under couch", "table", "above window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156750.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15088, "question_id": "a2snV4wHzBvXDtkzQd5abk", "question": "How many hands are used for this device?", "choices": ["three", "one", "two", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015088.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308413, "question_id": "a4jDfQQ7cDR9WFi3ULBEDu", "question": "The people are using what type of public transportation?", "choices": ["trolley", "shuttle", "bus", "light rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340527, "question_id": "a5HsRozPdMEsA7Us8VHrgQ", "question": "What is the little kid in the background sitting in?", "choices": ["highchair", "booster chair", "booth", "rocking chair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340527.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 185224, "question_id": "a5QTYrhwSSjzC7ZNRHhHdz", "question": "What are these types of signs called?", "choices": ["warning", "brand", "street", "historical marker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000185224.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17085, "question_id": "a6J7sSLvaX7qYGHLfs4B4D", "question": "What is the zebra doing that the cow is not?", "choices": ["galloping", "grazing", "mating", "mounting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567848, "question_id": "a7DFC3VWJpqVJuwkJrxbsc", "question": "Why is there a bulge on his leg?", "choices": ["tumor", "muscle", "swollen", "prosthetic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567848.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473535, "question_id": "a7pyCdg5e3Nj9SKmd2rqNq", "question": "What type of transportation is shown?", "choices": ["rail", "air", "water", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369196, "question_id": "a8AHihoozu8MCgkAkv5SX6", "question": "Based on his clothing what level of racer is the man on the motorcycle?", "choices": ["professional", "beginner", "semi pro", "amateur"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369196.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338462, "question_id": "a8NH2ZYrPVuLMz9URfFY9Z", "question": "What hour does the clock show?", "choices": ["seven", "three", "six", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338462.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309837, "question_id": "a9EyY5uc2sgKcRXQ8ohi7Q", "question": "What device does this object work with?", "choices": ["television", "car", "air conditioner", "computer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36172, "question_id": "a9GoKNmgVyAf6KqQGhwiap", "question": "What might be stored in the smoky plastic compartment seen here?", "choices": ["flour", "lettuce", "butter", "cinnamon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036172.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417289, "question_id": "a9itMWdPYo2H36vG59fkZ4", "question": "What will this man do next?", "choices": ["surf", "swim", "shower", "play polo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301511, "question_id": "aA9Bt42TZXjaKcLPotfNpg", "question": "What type of coat coloration does the cat leaning on the chair have?", "choices": ["pointed", "tabby", "calico", "tuxedo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152083, "question_id": "aAeZxKcWNTjvUAGzbMRhiC", "question": "What object is being used to clean the mess here?", "choices": ["cloth", "disinfectant", "toilet paper", "towel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152083.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211934, "question_id": "aBQkbTcm9osvv5Wmm29Rzm", "question": "What is likely housed inside the upper structure?", "choices": ["firearms", "clothing", "bell", "books"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211934.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490273, "question_id": "aBYgBinyj7YAr2w7Faiea4", "question": "What kind of building is likely to be in the above picture?", "choices": ["church", "museum", "tower", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490273.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107755, "question_id": "aBpusYUwgCRxLeyp3w94vB", "question": "Where does this bird find most of its food?", "choices": ["sand", "trees", "rocks", "air"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000107755.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507291, "question_id": "aCdv7Z6uAc6XHxHgxwMwBb", "question": "What is the big red thing on top of?", "choices": ["tracks", "tree", "box", "cloud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507291.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356966, "question_id": "aCmx5aCHZ4cAAbdRFk6aqE", "question": "What is the floor of this wash room made of?", "choices": ["wood", "carpet", "none", "tiles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356966.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388181, "question_id": "aCoj5VdiwPrvrud7QqMCry", "question": "How would someone most likely dress in this setting?", "choices": ["no clothes", "shorts", "bright colors", "warm clothes"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000388181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98797, "question_id": "aDusw9q9ACjkbiLBSjfWQ5", "question": "What does the person in the air have on their head?", "choices": ["crown", "feathers", "hat", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098797.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113194, "question_id": "aE7DY2mBNYbvkJofTGqyiu", "question": "How many legs do the entities here who will stay the driest have?", "choices": ["four", "two", "six", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103147, "question_id": "aEo4M7iavDn9tvF33kXenR", "question": "What part of a surfboard is this person grabbing with their hands?", "choices": ["ding", "tail", "rails", "nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178857, "question_id": "aFBWjZkqvPGy2R36TRZhMw", "question": "What is the floor made of?", "choices": ["tiles", "wood", "cement", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178857.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455457, "question_id": "aFgB9kTpkjKSsCVnPmQten", "question": "What feature can be seen on these animals?", "choices": ["horns", "wings", "pouches", "talons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363772, "question_id": "aG2dJRvvQZNo5HsqfZtHsF", "question": "What kind of danger is likely faced by the animals?", "choices": ["earthquake", "tornado", "car accident", "landslide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363772.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537285, "question_id": "aG93fkMhN6d8mktaScy6oY", "question": "What object attached to the surfboard would keep it from getting lost?", "choices": ["fin", "rutter", "tether", "wax"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416713, "question_id": "aGnepsqpBFhgykVsZkVK3F", "question": "What would one google if one wanted to buy some of these items?", "choices": ["cutlery", "tchotchkes", "haberdashery", "memorabilia"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000416713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178234, "question_id": "aHJAJzZCnw6E2ppdMumJca", "question": "What is required for this activity?", "choices": ["snow", "water", "ice", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 241298, "question_id": "aHNiF87FMuWLxuR6BYYPtQ", "question": "What is below the taco shell?", "choices": ["corn chips", "bananas", "eggs", "bacon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000241298.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128688, "question_id": "aJ28xdbQRqm9XP8UvKwynk", "question": "These animals are known for what?", "choices": ["wool", "spikes", "tusks", "quills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000128688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 185192, "question_id": "aJHFriHLrnX9BszWk2MQRf", "question": "What plug type would go into a socket on the wall?", "choices": ["type", "type w", "type omega", "type z"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000185192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432220, "question_id": "aKt8NA5t77HBKXESjeQ5NP", "question": "What company specializes in making the large white item?", "choices": ["ibm", "mcdonald's", "dell", "kohler"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311136, "question_id": "aLoTkkQSQN99ct45eVLedp", "question": "What style was this tower constructed in?", "choices": ["modern", "post modern", "gothic", "victorian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457853, "question_id": "aLuRYVAWJxke7Kk7aGRuth", "question": "Based on deductive reasoning what time is it right now?", "choices": ["1215 pm", "1215 am", "1115 pm", "1115 am"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257645, "question_id": "aMCD7ZTQeiSaQeT7md4V3M", "question": "Why does the animal stay there without leaving?", "choices": ["obedient", "tied up", "lame", "hungry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257645.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205127, "question_id": "aN3ULy9yq99NDz4Eg2TXJb", "question": "What type of location is under these street signs?", "choices": ["parking", "highway", "intersection", "bridge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15491, "question_id": "aN6ACbtTpEGHEyieEGcxag", "question": "What color paint should they get if they want to eliminate the graffiti from the truck?", "choices": ["purple", "white", "red", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015491.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576217, "question_id": "aQiPjrTPhrXSLTM3yabVCp", "question": "Why is the bird here?", "choices": ["foraging", "exercise", "resting", "lost"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576217.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 140091, "question_id": "aQjA4CdPoinNwNwEkwutA2", "question": "What activity is the woman engaging in?", "choices": ["dodging", "catching", "clapping", "throwing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000140091.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103177, "question_id": "aR3PtXPGuGKcfKsrMfpJKw", "question": "What trick is being performed?", "choices": ["grinding", "ollie", "nose grab", "melon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317776, "question_id": "aSToJik2CYFutLM7CLrZCS", "question": "Red wine is made from which fruit?", "choices": ["citrus", "grapes", "apple", "passion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317776.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328004, "question_id": "aSvGu35y56gxCSaAk4afSJ", "question": "How would the red white and blue item have to be modified to operate on land?", "choices": ["add length", "bigger engine", "add wheels", "remove weight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328004.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387968, "question_id": "aTDo7hXJYMTxgTjPHNt5eX", "question": "Why is the toilet sitting there?", "choices": ["throwing out", "installation", "planter", "decor"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000387968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276625, "question_id": "aTXR6mfpyywyuCSCavsXtY", "question": "Why is the man wearing a suit?", "choices": ["costume", "uniform", "warmth", "dress code"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550715, "question_id": "aVsxLiXP4rtRSrZNMQvAcs", "question": "How are they getting downhill?", "choices": ["skates", "sled", "skis", "snowboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550715.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115154, "question_id": "aWBZcHnA4VksUqAL2HFL7W", "question": "This animal produces which one of these liquids?", "choices": ["beer", "soda", "milk", "vinegar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481993, "question_id": "aWT4nTqUXLqTcGa5N2ue93", "question": "The man is likely doing what?", "choices": ["traveling", "eating", "worshipping", "exercising"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356267, "question_id": "aWbKpLZPWzhVxgs6JpZGjy", "question": "Why is he upside down?", "choices": ["is falling", "showing off", "distracted", "is resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8537, "question_id": "aWhGdonfuv8PADHKJTbLPQ", "question": "What might a lady put down in here?", "choices": ["soda", "toilet paper", "water", "seat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396079, "question_id": "aWit2tcub8o3ieZsFfKMiV", "question": "What is the highest mountain on this continent?", "choices": ["washington", "kilimanjaro", "mauna kea", "mckinley"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349257, "question_id": "aXHjVtgx5kJ8NoeMFgpVLy", "question": "What is the red wall on the left made from?", "choices": ["bricks", "paint", "wood", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128, "question_id": "aXUvdYXSnUZciQukvMjBJC", "question": "What day is it?", "choices": ["elephant day", "monday", "tuesday", "weekday"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59800, "question_id": "aY8xtkjzzB29kr28cfd4wv", "question": "What shape is this sign?", "choices": ["rhombus", "square", "circle", "hexagon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22516, "question_id": "aZxDkAujjf5cUH4TgcCpvz", "question": "What feature do these animals have?", "choices": ["trunk", "talons", "wings", "gills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022516.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59773, "question_id": "aa7nmu2EYy5w3iPRZBfbfL", "question": "Where is the car most likely going with a surfboard in the back?", "choices": ["beach", "movies", "river", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059773.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196823, "question_id": "aaiF6wWFS67b7QdKLnZvog", "question": "The seeds on the outside of this fruit are what?", "choices": ["disease", "ovaries", "stems", "sprinkles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382474, "question_id": "abeN2x9aYmnaKvVhtqjuoj", "question": "Where is the penny from?", "choices": ["united states", "eurozone", "canada", "england"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573866, "question_id": "abmTT3SFbCTxPmcjzFKDHv", "question": "What sport is this person just finished playing?", "choices": ["basketball", "rugby", "baseball", "tennis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573866.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54937, "question_id": "ac2GmtpGg9VFTwxiK4rK79", "question": "Which animal is most likely to be seen from a distance?", "choices": ["left zebra", "giraffe", "right zebra", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262913, "question_id": "acQcNTQqJSFiMHYmhTekd3", "question": "What type of sink is this called?", "choices": ["pedestal sink", "farmhouse", "undermount", "drop-in"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262913.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154548, "question_id": "acVddHzt27Y2nYDpqjQwpd", "question": "How was the red mark applied?", "choices": ["spray", "brush", "pen", "finger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157580, "question_id": "adjwBpXC5sguLH44T2AATT", "question": "What style sking does this person enjoy here?", "choices": ["alpine", "barefoot", "none", "cross country"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199611, "question_id": "advutmYkJoHab5Ejwzx7qb", "question": "How is this train powered?", "choices": ["electricity", "diesel", "coal", "steam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199611.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91551, "question_id": "aewjDyNanSXnV2tfu4Y9nj", "question": "What language is seen here?", "choices": ["canadian", "dutch", "german", "spanish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133856, "question_id": "afRjjAhoYwnHqxuqskkNDQ", "question": "What is the main color of the large bird?", "choices": ["purple", "red", "gray", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133856.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442851, "question_id": "agLvY4Yqy3wisS9WYpKo8A", "question": "These are probably being cooked in what liquid?", "choices": ["broth", "water", "cream", "oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368260, "question_id": "agy8zBbJTJ6aLeoJxxSHMv", "question": "How did the cat get up there?", "choices": ["climbed", "master", "fell", "ladder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368260.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463714, "question_id": "ahtGz8oDRCDYzxasQByCba", "question": "What country does this plane represent?", "choices": ["england", "mexico", "canada", "usa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387260, "question_id": "aiG6ymZBnGv6aoES3YyKVz", "question": "What is an action this animal can complete?", "choices": ["sting", "fly", "gore", "type"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387260.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532122, "question_id": "aiXpkfEdrEpJNUHEfzxrzH", "question": "What setting is the animal in?", "choices": ["grassy area", "city street", "tundra", "desert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532122.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553423, "question_id": "aixzQeayhY3ExWbLrTzztW", "question": "What bathroom fixture contains the most germs?", "choices": ["hand dryer", "mirror", "faucet", "bathroom sink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424311, "question_id": "aj38ZDvRufgUueFTcBbqs9", "question": "The bench here is oriented to watch what?", "choices": ["ocean", "clouds", "nothing", "mountains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424311.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90624, "question_id": "ajKNhYTJ8HS2K72TSFrTcn", "question": "What part of the board is likely to hit the ground next?", "choices": ["side", "none", "wheels", "end"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090624.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139470, "question_id": "akzHsNNEwk4BkBY7S4WTCQ", "question": "As he is entering the area what is he said to be doing?", "choices": ["dropping in", "falling in", "heading in", "going in"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11864, "question_id": "aoFkhrSdswiKhzoABNBUNq", "question": "What time will it be on the next hour?", "choices": ["800", "300", "1000", "400"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38457, "question_id": "aoJLLNkYrZv8ZSRz2MmGZu", "question": "What type of outfit is the woman wearing?", "choices": ["dress", "bikini", "skirt", "shorts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85524, "question_id": "aoYAMLyyQ3kmNVawWXMzf5", "question": "What is in the glass to the left?", "choices": ["dice", "candy", "flowers", "liquid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557937, "question_id": "apWtnaRGdYJcY3S2xbK4kp", "question": "To what direction is the sun located with respect to the people?", "choices": ["left", "front", "right", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505036, "question_id": "aqATQgjy2ZvPGWtiehCeNR", "question": "The design on the board looks like what part of the human anatomy?", "choices": ["hand", "face", "feet", "lips"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 70742, "question_id": "aqRHRaGqv2DCSTU8G8SEQ4", "question": "What is he doing with the plate?", "choices": ["eating it", "cleaning it", "discarding it", "displaying it"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000070742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337243, "question_id": "arLZCB89VtqQKiKRgYXrzF", "question": "What could be a possible obstacle to the bear in crossing the field?", "choices": ["trees", "logs", "flowers", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104059, "question_id": "arwmEi52PrzX7TX8PR5Gn3", "question": "What does this large item run on?", "choices": ["water power", "batteries", "fuel", "solar power"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371386, "question_id": "as56sWzW5LPmC4iamTWGiz", "question": "What city was this picture taken in?", "choices": ["london", "new york", "los angeles", "chicago"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224631, "question_id": "asFCbdJ7dnMNnjfyXg3SHt", "question": "What emotion is the teddy bear expressing?", "choices": ["anger", "happiness", "sadness", "boredom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458994, "question_id": "au4av9c66mFmSz9pAohqF9", "question": "What type of shoe is the woman wearing?", "choices": ["sandal", "flat", "boot", "clog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458994.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127738, "question_id": "audWx4DwpxUXMejgY8vV7J", "question": "What is this action called?", "choices": ["sleeping", "balancing", "eating", "walking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127738.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177656, "question_id": "av9vukDVZa8Wy2RMjPPD5h", "question": "What type of business is listed on the back of the truck?", "choices": ["clothing", "commercial bakery", "dairy", "escort service"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227998, "question_id": "awXbxJem4fZ8PzqTXSXZTC", "question": "Why is there an awning above the clock?", "choices": ["block sun", "for sale", "prevents vandalism", "protect clock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173419, "question_id": "ax2Rou6fTuwEDZGwsyHAdt", "question": "What is the snowboarder likely looking at?", "choices": ["ground", "photographer", "falling goggles", "snowboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221074, "question_id": "b3nL7gTu7ccgo34UtkyG8b", "question": "What is the best Frisbee dog breed?", "choices": ["labrador", "poodles", "retriever", "bull dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221074.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368433, "question_id": "b3yAYRsRnttWibDCdFnoV7", "question": "What prevents the water in this device from freezing too?", "choices": ["heated", "underground", "blankets", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507722, "question_id": "b4VTAjBoUx7BoSpbUMZDoB", "question": "What type numbers do most clocks here have?", "choices": ["arabic", "cartoon", "roman", "none"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000507722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27196, "question_id": "b4y7pssPMWL3XVxNjYgmBF", "question": "What has caused the stone on the ground to become reflective?", "choices": ["flooding", "water balloons", "glaze", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027196.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448248, "question_id": "b5BmJBo8qrj6GRU2R4jVwa", "question": "What direction will the wings likely go next?", "choices": ["up", "right", "down", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448248.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160605, "question_id": "b5R9CynFUxFjtDWPzrU2m6", "question": "Which part of these animals is the most precious?", "choices": ["skin", "ears", "tusks", "nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160605.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35892, "question_id": "b5YRaSyqbSZs7VFrnyMVY7", "question": "The child wears a tag that allows them to do what?", "choices": ["sing", "ride lift", "make snowballs", "see movie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130057, "question_id": "b5opRkiNNC7XDjkNaxapSr", "question": "Where are the cows?", "choices": ["cornfield", "yard", "show", "pasture"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367677, "question_id": "b7PMcK7vJt2nTrHmhAnVue", "question": "In which country does this bus take on passengers first?", "choices": ["south africa", "holland", "germany", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367677.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95794, "question_id": "b7zyiGYo83E6xufSauqnQL", "question": "What is leaning against the toilet?", "choices": ["girl", "brush", "chair", "vacuum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374932, "question_id": "b8oeKoHHejh79ZmHas7StG", "question": "What is this animal used for?", "choices": ["boots", "tusks", "silk", "wool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 421262, "question_id": "b8wfhvezXUgLZzhavSZM3V", "question": "What are the gray walls of the building made from?", "choices": ["glass", "wood", "steel", "stone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000421262.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365585, "question_id": "bAspKcJGQ29AT56TCQ6JeV", "question": "She is prepared for a what?", "choices": ["job", "trip", "prison sentence", "sleepover"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369218, "question_id": "bCyJpNfXtoJgjZQy8p8VqY", "question": "This bear lives in a wood of how many acres?", "choices": ["30", "100", "200", "50"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499288, "question_id": "bDmKA35gCMZ5byEWEtU3iC", "question": "What was the white shiny area around the bathtub made from?", "choices": ["sand", "wallpaper", "tile", "paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499288.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302542, "question_id": "bDrTyhwMDZyoLSfox9y82G", "question": "What is one item in the bathroom that is already taken care of?", "choices": ["toilet handle", "mirrors", "faucet", "soap dispenser"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 564279, "question_id": "bDwwQxubcuukDqvQhxMepw", "question": "What factor makes this bus greenest?", "choices": ["fuel utilized", "passengers", "nothing", "wipers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000564279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417920, "question_id": "bFGtzy9TWc5bgbBB4At99v", "question": "What kind of clothing is the person wearing?", "choices": ["swimming costumes", "official", "casual", "semi-casual"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260498, "question_id": "bGUQZ7iuSvp9JHRY66943a", "question": "What can be said about the woman and her equipment?", "choices": ["both underwater", "different colors", "matching colors", "same size"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000260498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450051, "question_id": "bHkNSScptN6pfqw98YDjz7", "question": "What is the largest item here?", "choices": ["cave", "gorilla", "bell", "wave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197926, "question_id": "bJ7pigLGakDLxfM3LtBhGX", "question": "What is the woman wearing while she is brushing her teeth?", "choices": ["tiara", "hairnet", "mask", "wedding dress"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197926.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432241, "question_id": "bJpFY2YUs98wu9YnZKzTHn", "question": "Which company makes this model of plane?", "choices": ["boeing", "united airlines", "jetblue", "airbus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86387, "question_id": "bLeEMG5xjqfRmLuy2Yqwdi", "question": "What is near the white lines?", "choices": ["cattle", "clown", "car", "baseball player"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454352, "question_id": "bM77DcPdtRPjgmXwDd5jSQ", "question": "What feature does this animal have?", "choices": ["spikes", "gills", "wings", "claws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454352.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507932, "question_id": "bM8eAFC9ydH2kxsHgC5Fet", "question": "What bodily function is portrayed here?", "choices": ["urination", "defecation", "drinking", "vomiting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60660, "question_id": "bMBXrWQe6AeppJweWhWG26", "question": "How did the dead bird get into the bathroom?", "choices": ["flew", "human", "cat", "walked"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154162, "question_id": "bMnCRyLyPLrMu7DNUoXE2s", "question": "What crime could occur here outside the building based strictly on what is visible?", "choices": ["robbery", "theft", "extortion", "piracy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556347, "question_id": "bMpNximVMrR7vKggqk3PqR", "question": "What is the purpose of the rod sticking upward from this item?", "choices": ["visibility", "anti-theft", "opening", "spraying"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000556347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161335, "question_id": "bNZoQzaFQ7og5z9GJezcUy", "question": "What has paint dripped and written on it here?", "choices": ["light", "bed", "mirror", "cabinet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98384, "question_id": "bPXPd69PBkRp55xT8i3BeD", "question": "What is the rectangular object on the floor for?", "choices": ["drainage", "heat", "music", "air conditioning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098384.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574821, "question_id": "bSewZ7bdesRALdWHP8QffN", "question": "Why is the napkin here?", "choices": ["decoration", "clean plate", "protect plate", "grab sandwich"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000574821.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157082, "question_id": "bVBrhC2EAjLsryY8QQE28X", "question": "What kind of signs are shown?", "choices": ["promotional", "traffic", "brand", "warning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 132989, "question_id": "bWQtCXbHRagLjwSvgguBP7", "question": "What color is the table underneath the mouse?", "choices": ["white", "orange", "yellow", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000132989.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192673, "question_id": "bWUE2kCmWi96xQLBoVVjoz", "question": "What are they doing?", "choices": ["video game", "channel surfing", "cleaning remote", "controlling robot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192673.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308439, "question_id": "bWacsbA5d4WVFRqzEz4uCK", "question": "What are the people sitting on?", "choices": ["plane", "train", "bus", "boat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308439.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252777, "question_id": "bYNKMG5iYgbGKhp7VyPJgv", "question": "What is the most likely thing under the blanket?", "choices": ["pillow", "legs", "stomach", "books"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275803, "question_id": "bYg7UbtaiV7aRwCd9Vvfuy", "question": "What is the size of this keyboard compared to other computer keyboards?", "choices": ["bigger", "smaller", "much bigger", "same size"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485347, "question_id": "bZGYS55GaNdzxDZibJkjMa", "question": "How many more of these animals are needed to make a dozen?", "choices": ["two", "ten", "six", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411569, "question_id": "bZKf9QapbfJftjuhPQELKW", "question": "What is truly the tallest object in this area?", "choices": ["hills", "elephant", "giraffe", "trees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411569.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135452, "question_id": "bZmuvHvTUaVWejWz6sJ5o6", "question": "What is the weather condition for the man wearing the straw hat?", "choices": ["snowy", "windy", "foggy", "rainy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90688, "question_id": "baRvyiuKKtPQK8WB5ogDyF", "question": "What type of furniture do these items belong on?", "choices": ["bed", "desk", "sofa", "chest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445463, "question_id": "baWfYBmY4By4zHTVFkpPXA", "question": "How many rail carriages are in this company's fleet?", "choices": ["22", "35", "six", "14"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445463.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 509184, "question_id": "bb65yspSbV66WMdDcuTjH5", "question": "Which giraffe most likely eats more food?", "choices": ["third giraffe", "neither", "right giraffe", "left giraffe"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000509184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518414, "question_id": "bbetHPomdYLLAK7M6x7KqD", "question": "What type of show is the cat watching?", "choices": ["comedy", "nature", "drama", "cartoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352578, "question_id": "bbqgKjCKGgAPUVpB4XkSXr", "question": "What sort of icon is this person wearing?", "choices": ["hair clip", "phone", "none", "cross"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401689, "question_id": "bdQkfiUjqmRHCnbDwf7y4k", "question": "If most of these cattle were thirsty from where would they drink?", "choices": ["no where", "barn bar", "stream", "dewy grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401689.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162766, "question_id": "bdpGjEnzZXChDKYV9guovb", "question": "The man in the bedroom just finished doing what activity?", "choices": ["ironing", "eating", "sleeping", "bathing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289086, "question_id": "bdvtJuhwHCq4yDm6frrieX", "question": "What is the young girl doing on the pillow?", "choices": ["watching tv", "sleeping", "playing", "exercising"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443219, "question_id": "bf94Liae4rKtWdLYwAKHh3", "question": "Why is the child under the bench?", "choices": ["eating", "shade", "birdwatching", "hiding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443219.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434218, "question_id": "bfnHYiKDHCyDPWy94prAjH", "question": "Which one of these does the money company also specialize in?", "choices": ["travel", "websites", "appliances", "clothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57243, "question_id": "bfyQ9ecT88brVmcKve4T8L", "question": "How is the television receiving the broadcast of the soccer game?", "choices": ["cable tv", "antenna", "online streaming", "vhs recording"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147479, "question_id": "bgL7jeg2Qqckdjbg9EyeXv", "question": "What is usually found in this room?", "choices": ["cold cuts", "toilet paper", "television", "books"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558952, "question_id": "bgQeDdateurm2PJLDoPRSN", "question": "Where are these containers located?", "choices": ["desk", "office", "kitchen", "museum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121177, "question_id": "bhdtA9jvAGpb9EhLiQzV5Q", "question": "What usually goes into the item in the far left corner?", "choices": ["grilled chicken", "leaves", "waste", "paintings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171531, "question_id": "bhhu8Gx2Die4mS5Efb7GDf", "question": "What makes this place good for feeding these animals?", "choices": ["long grass", "sloped area", "fenced area", "shaded area"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171531.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381535, "question_id": "bhpdc5CM2bFvbxvXCMhgsE", "question": "Artwork on this bus is inspired by the writings of whom?", "choices": ["dickens", "potter", "suess", "rowling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283802, "question_id": "bhv64VezCQ6gubgeaW7Vke", "question": "What is this dog trying to do?", "choices": ["eat", "sleep", "run", "watch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400714, "question_id": "bidA6YFJzmDXzxWBTfoSSK", "question": "What primary colors can be mixed to make the color of the boy's shirt?", "choices": ["red blue", "red white", "red yellow", "blue yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301666, "question_id": "bijQ5izvjzqB8xpNm2wLTB", "question": "In which country is this yellow train engine located?", "choices": ["england", "australia", "canada", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301666.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505036, "question_id": "bjCSsYRg6nDdvKb72DNuZi", "question": "How is this possible?", "choices": ["kid fell", "brilliant child", "fast learner", "photoshop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 509343, "question_id": "bjGifwdYvqpatD3sY672T6", "question": "What skateboard move is the man performing?", "choices": ["grind", "reverse", "kickflip", "ollie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000509343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451045, "question_id": "bjGsdGhyZ8BEvqWzs4x5fY", "question": "This reminds of what Christmas Carol Character?", "choices": ["grinch", "rudolph", "santa", "frosty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451045.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173765, "question_id": "bjhK899FCj88jyZpXXaaZN", "question": "The people with the board here are members of one what?", "choices": ["family", "gang", "shark organization", "mob"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270276, "question_id": "bkPQxqEJH3gkTGMxaDfydi", "question": "What show is streaming on the phone?", "choices": ["american dad", "family guy", "south park", "simpsons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17007, "question_id": "boCEJ5uTCSV24XKAvY9YGU", "question": "What number comes after the number on the hydrant?", "choices": ["83", "66", "75", "92"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017007.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443267, "question_id": "boXx6uFkKEEb6kvsnYtcbE", "question": "What sandwich does this vehicle share a name with?", "choices": ["submarine", "double decker", "french dip", "reuben"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390139, "question_id": "bodZWLiqpaHThveA4Pe8Rg", "question": "What type of paper is shown?", "choices": ["toilet", "towel", "loose leaf", "construction"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390139.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557715, "question_id": "bpRqZeajE7duhsXMQC372o", "question": "What is he trying to change?", "choices": ["mood", "carpets", "food", "channel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557715.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337482, "question_id": "bqMvjuPuwyk8mHmQWTF9bm", "question": "The man has what in his hands?", "choices": ["racquet", "staff", "globe", "map"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133427, "question_id": "bqVBAPBkL5sA5JKmhJ9YGu", "question": "What type of items are shown?", "choices": ["umbrella", "fan", "t shirt", "dress"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81769, "question_id": "bqvsp2eb75YMznPydfXEWL", "question": "Why does the giraffe spread its front legs apart?", "choices": ["tired legs", "reach water", "injury", "stretching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34881, "question_id": "brj5rWmvvZCF8dA29CcTqq", "question": "How was her hat made?", "choices": ["taped", "draped", "sewed", "knitted"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357766, "question_id": "brnfZhsbb6fiTJ5hWLuV2N", "question": "How is this food portioned to be served?", "choices": ["shredded", "cubed", "diced", "sliced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224972, "question_id": "bsBZ5PS5yWF3GBgP495GSq", "question": "Why is the man using his thumb?", "choices": ["to call", "to game", "to print", "to talk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224972.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550731, "question_id": "bsnYUTiSeg2mRXE2CAze2q", "question": "What store sells these items?", "choices": ["office max", "mcdonald's", "best buy", "waldbaum's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271074, "question_id": "bt3oVQCnoQnmejQJ5tYP7C", "question": "What type of animal is this?", "choices": ["service", "reptile", "domestic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271074.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161068, "question_id": "btHo7iSavrNMgJzLTVKE4Q", "question": "How is one item different from all of the others?", "choices": ["upside down", "alcoholic", "rotten", "broken"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161068.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96448, "question_id": "btY5o9oYTtZ5JM5JeUYEKh", "question": "What does a person usually need to get on the red structure?", "choices": ["passport", "ticket", "helmet", "ankh"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096448.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559917, "question_id": "bu3QACMA8rKpeytJZvJLyq", "question": "What kind of phenomena is behind the man?", "choices": ["shark", "tsunami", "wave", "typhoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439211, "question_id": "buVrDB2dhQL4TbCAxi7qDV", "question": "The animal is crossing what?", "choices": ["legs", "eyes", "quills", "horns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350490, "question_id": "bucay5KJvMoxX7msMYYmXS", "question": "What did she do with the frisbee?", "choices": ["found it", "hid it", "caught it", "tossed it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350490.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 70419, "question_id": "bv657ApjQRK4eeet3qvAqx", "question": "What is the name of the bus company?", "choices": ["bus", "bctransit", "bc", "transit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000070419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358272, "question_id": "bvGdjvHGxPvqwLmTYrxUfE", "question": "The item hung in shower here is meant to amuse whom?", "choices": ["guests only", "mother", "child", "father"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358272.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 166064, "question_id": "bvpZoRVF6KrLSBXDfD3Qef", "question": "Why is his hand on the ground?", "choices": ["pushing off", "showing off", "stop falling", "is falling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000166064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415844, "question_id": "bvrhh6gYXPeb9qZb2Gjwct", "question": "What is the surfboard made of?", "choices": ["foam", "wood", "fiberglass", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415844.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234802, "question_id": "bxjZfCMdCqoVTDqkwf6fMG", "question": "This college offers courses of which major?", "choices": ["arts", "business", "technology", "science"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000234802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202314, "question_id": "bz5hsQQpJ88ThYpHa7aR6i", "question": "What are the men pointing at the skateboarder in mid air?", "choices": ["lasers", "cameras", "swords", "phones"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190479, "question_id": "bzDWdbeNsRNtm9DGEsZnEm", "question": "Drinking from where visible here can cause the horse problems because of salt?", "choices": ["hills", "canteen", "ocean", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288370, "question_id": "bzfBnR8M5SycMhpAHMkyNV", "question": "What is on top of the woman?", "choices": ["box", "baby", "covers", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288370.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505166, "question_id": "bzmUfBsdL5x5jrtEBB9iyq", "question": "How is the mirror in the motorcycle above called?", "choices": ["display mirror", "none", "side mirror", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505166.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407014, "question_id": "bzy5jNeyQqWDHWeRqyyEow", "question": "What is comparable to the height of these adult animals?", "choices": ["toddler", "basketball hoop", "refrigerator", "mouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000407014.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252142, "question_id": "c2WdGEu8MxiWvRdTvxgBBx", "question": "What is located near the bear?", "choices": ["anchor", "watermelon", "orange slice", "mermaid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84343, "question_id": "c2YYWdDggq7nkiWBoofrbe", "question": "What type of transportation does this sign direct?", "choices": ["air", "road", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66777, "question_id": "c3qSHT8XqACaWPCHESwwFS", "question": "What does the person on the left have on?", "choices": ["rain coat", "fedora", "ballet shoes", "skis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508428, "question_id": "c4UNaLPEoRaYGyaeXGBrJL", "question": "What time is it?", "choices": ["1138 pm", "1138 am", "757 pm", "757 am"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508428.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365335, "question_id": "c5ReWxhbLMLDrNKnX8a2VD", "question": "What activity are the humans performing right this instant?", "choices": ["surfing", "walking", "boarding", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501040, "question_id": "c5awYQMtfqwUa3Q2ZKtMvT", "question": "What other sport uses similar apparatus to this?", "choices": ["snowboarding", "tennis", "football", "skiing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123442, "question_id": "c643brLAyQ2XMo72p9fSGb", "question": "The skateboarders arm position is going to help him with what?", "choices": ["spinning", "landing", "tricks", "launching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123442.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196389, "question_id": "c6dFxgVd5dZtoevFA7jRqL", "question": "What type of flooring is visible beyond the sleeping cat?", "choices": ["sub-flooring", "carpet", "planks", "marble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342282, "question_id": "c6qJh6oj7fEcFqLLZLnCG5", "question": "What was the original name of this airline?", "choices": ["virgin pacific", "virgin sydney", "virgin blue", "virgin yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177761, "question_id": "c6yVnH5DTwX6zdhUY7QYc6", "question": "Before seeing these animals what will alert you as to their impending arrival?", "choices": ["nothing", "dogs", "smell", "bell sound"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319656, "question_id": "c7836fRkKbqvAknH3L4Fyw", "question": "What is the pig doing with the chicken?", "choices": ["stalking", "watching", "attacking", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501141, "question_id": "c86ULaTFSoZz9EaQLhszKH", "question": "What does the item to the left of the bird look like?", "choices": ["pizza", "barrel", "worm", "house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403734, "question_id": "c8xfFo7dhYs8vTX8ZCCphU", "question": "What is on the very left of the bottom row?", "choices": ["keys", "playing cards", "egg", "book"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403734.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294427, "question_id": "c9PDzJJ29pmgKjyitfaeis", "question": "What kind of event is occurring in the far background?", "choices": ["snow", "tornado", "blizzard", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521072, "question_id": "c9SM2QCvseZqVcJz63JtFA", "question": "What animal is the creature on the left descended from?", "choices": ["tiger", "elephant", "bat", "wolf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454443, "question_id": "cA48EZv25gjbFeDjsL8Y4E", "question": "What is needed for this activity?", "choices": ["rain", "water", "sun", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439109, "question_id": "cAfvnAtrRKjD2J9tmxVyxh", "question": "Which one of these is another way to prepare the white food?", "choices": ["julienned", "scrambled", "jerked", "grilled"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439109.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409082, "question_id": "cAyuVmrf4poA6Rt4Esi3Yw", "question": "What person would most likely use these items?", "choices": ["doctor", "carpenter", "accountant", "computer programmer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63581, "question_id": "cBJDSLBzLZonLFBGTnYdZd", "question": "What is needed for this activity?", "choices": ["key", "engine", "wheels", "horn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211820, "question_id": "cBjyfoKKZjwBaj9L9Vy8wy", "question": "The boy most likely wants to be what when he grows up based on his attire?", "choices": ["dog catcher", "fire man", "baseball player", "garbage man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211820.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425132, "question_id": "cBnTZaD3eimuq4UKX2jbnv", "question": "How is the smaller bear likely related to the larger one here?", "choices": ["offspring", "cub in-law", "mortal enemies", "prey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000425132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224760, "question_id": "cCjE6bgkgCqtEjNLUoYPYX", "question": "From what object do the three long thin shadows on the wall on the left originate from?", "choices": ["desk", "entertainment center", "laundry rack", "floor lamp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381920, "question_id": "cE37FaTjF7xdyJmeXJhEqh", "question": "How many giraffes are in this image?", "choices": ["three", "two", "five", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418859, "question_id": "cFwySJC8Zm7Acde7iXPgTm", "question": "How do these people know each other?", "choices": ["family", "neighbors", "coworkers", "rivals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418859.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124108, "question_id": "cG5qfuQUjLgxSh4SMP8GFi", "question": "Name of this animal is what?", "choices": ["cow", "yak", "deer", "sheep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124108.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157104, "question_id": "cGzF3Zu7o7a3Qo2QdFXWXo", "question": "What common skateboard trick is most probably being performed here?", "choices": ["kickflip", "ollie", "shuvit", "nollie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233753, "question_id": "cJ2kqaNn3RcZcH6FdBWBo7", "question": "This clock is most likely in what area?", "choices": ["baltimore", "boston", "denver", "juneau"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63034, "question_id": "cJuAVdgWqzKgEpuYvBNgzG", "question": "What is the state of the waves of the ocean?", "choices": ["high tide", "medium tide", "low tide", "tsunami"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063034.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254526, "question_id": "cKDA2GvReeaZ9b3UFqVLix", "question": "What does the hair style called?", "choices": ["quiff", "high fade", "buzz", "spike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254526.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400714, "question_id": "cLkp5oWB4yCidJGJGmLcM6", "question": "The child is what to this man?", "choices": ["stranger", "son", "sibling", "enemy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235219, "question_id": "cLomioFJZ5p2Lp8W2pTy35", "question": "Why is the room the man with the tennis racquet is in covered with green sheeting from wall to floor?", "choices": ["safety", "film production", "style", "game play"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235219.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17759, "question_id": "cM8w5Gw6jTZAzGtu7YrXs9", "question": "What continent would this animal be found in naturally?", "choices": ["south america", "north america", "europe", "africa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017759.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267097, "question_id": "cNBYf56PgUHD2uobza3YHB", "question": "What is the fork-like object decorating the skateboard called?", "choices": ["salad fork", "scepter", "spear", "trident"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96606, "question_id": "cNhDmLKDtx33izNXiDEbPj", "question": "What do customers do inside the red awninged building here?", "choices": ["log on", "lift weights", "buy clothes", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096606.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 526573, "question_id": "cPNgLTSV8xE7iewUHYUZUQ", "question": "What does the foil help do?", "choices": ["keep warm", "marinate juices", "keep clean", "keep cool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000526573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51061, "question_id": "cPdtcBkAN9WtTmXuDqjVXE", "question": "What material allows the red material to lay on top of each other?", "choices": ["wax", "honey", "glucose", "mortar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051061.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386477, "question_id": "cPiGzAgoGNmL8WqucgmEfP", "question": "How many species of flowers are seen here?", "choices": ["none", "three", "five", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386477.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139880, "question_id": "cR8bxzdJHpzZi3D4MLdVHE", "question": "What color are most letters?", "choices": ["black", "yellow", "white", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366771, "question_id": "cRCJo5vi5FYFLmVe7q3WQ2", "question": "What leavening is contained in the food shown here?", "choices": ["yeast", "beer", "milk", "wheat bran"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245745, "question_id": "cRZqioghuprejvouGPu5to", "question": "What does the company brand in the person's skateboard majorly produce?", "choices": ["beverages", "water", "paints", "cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24993, "question_id": "cS69MkCyfkas7URykJfiu2", "question": "What is the kid riding?", "choices": ["bike", "bull", "skateboard", "surfboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 538271, "question_id": "cSVKYmfFzbUTPxKkgd7jUX", "question": "What style jewelry is shown?", "choices": ["modern", "costume", "antique", "electronic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000538271.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164182, "question_id": "cSf9CQsnbexXYLYasB83Du", "question": "Who likely owns these stuff?", "choices": ["boy", "man", "woman", "girl"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000164182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373121, "question_id": "cTCW85tC4sKMLumzW3j6GT", "question": "What are the long red strips?", "choices": ["tomatoes", "red pepper", "jalapeno", "radish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373121.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317421, "question_id": "cTHYmBLK2kXNpBPbrteKJH", "question": "What do we know about this donut?", "choices": ["old", "last one", "stale", "no good"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53699, "question_id": "cTua3MGbtFf7yc3pg4PVZ9", "question": "What year was the company whose name appears on this vehicle founded?", "choices": ["1955", "1962", "1971", "1923"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053699.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253128, "question_id": "cUfAYw97pVtwFeGGgVXG2K", "question": "Where is this animal located?", "choices": ["circus", "yard", "room", "zoo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520084, "question_id": "cUqBLfuvk2upVUqnohsJXy", "question": "What language it is?", "choices": ["french", "malay", "zulu", "finnish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373083, "question_id": "cV3HLaDwutL2sTHvKcLxG5", "question": "Which group has the right amount of members so that each would each get one surfboard?", "choices": ["jackson five", "beatles", "spice girls", "three amigos"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000373083.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366857, "question_id": "cWuZd9Ufe7aYkywmoUJYXq", "question": "Name of this aircraft is what?", "choices": ["biplane", "flight", "polar plane", "helicopter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366857.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581342, "question_id": "cXV4yVQBJUL6LNQgpX3nmX", "question": "What Klean drink might be enjoyed in a sealed container here?", "choices": ["shake", "ice", "milk", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581342.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556981, "question_id": "cYUBZLgUsauib3vQSuGG9f", "question": "What is the woman wearing?", "choices": ["fedora", "coat", "necklace", "armor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556981.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61387, "question_id": "cYWzikqTJReyTrhJ3tvJrd", "question": "The advertisement here is selling what product?", "choices": ["hair products", "nothing", "phones", "hats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230897, "question_id": "cZvyVXu2GAcC8JHvme4hvn", "question": "What gait does this horse appear to be in?", "choices": ["walk", "trot", "gallop", "standstill"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230897.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297142, "question_id": "ca38CsFwcB6PskGFv82o2r", "question": "If these animals wanted to get wet how would they achieve it?", "choices": ["dive", "go up", "lay down", "fly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71782, "question_id": "caQRPhYQAUAeK78it7GpmQ", "question": "What type of cargo is the blue train carrying?", "choices": ["chemicals", "coal", "grain", "sightseeing passengers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071782.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57711, "question_id": "caX6GFMsn4F9RfQgpeS6QG", "question": "What color handle does the screwdriver have?", "choices": ["green", "blue", "yellow", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181335, "question_id": "cavdYoYkqSFVxWgXMx4FZ7", "question": "What weather event looks like it occurred in this location recently?", "choices": ["snow", "tornado", "rain", "hurricane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000181335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 220461, "question_id": "cayfhVvVSSdt6nvnaNHzuj", "question": "What is the foot of this animal called?", "choices": ["hoof", "paw", "web", "barnacle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000220461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367170, "question_id": "cbNxBDk7RYjniM92WKpyYj", "question": "The large red and blue item appears to be in the shape of what letter?", "choices": ["d", "w", "q", "m"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374943, "question_id": "cdktTDbLGq2NVNhaAmEQka", "question": "What needlework is the shiniest golden in color?", "choices": ["minor stitching", "embroidery", "dye", "ink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360492, "question_id": "cfCzkyeNS62d6pGGjBbtNw", "question": "The dog is dressed as if it will participate in what activity?", "choices": ["snorkeling", "skydiving", "surfing", "volleyball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360492.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47689, "question_id": "cfyCasxWRtzUe9BbJKQBxj", "question": "Why is nobody visible here?", "choices": ["model homes", "hiding", "noon time", "quiet neighborhood"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000047689.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448834, "question_id": "cgTexCcCoXFyfNyztAJRKx", "question": "What is the shape on the outside of this clock?", "choices": ["cylindrical", "square", "circle", "rectangle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448834.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371386, "question_id": "cgdnSRPd3kRryspMzd7UgQ", "question": "Which one of these metals is often used to make these labels?", "choices": ["bronze", "gold", "copper", "aluminum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508794, "question_id": "ch4PByQ5H2KtHpHVuAGPsn", "question": "The appliance will do what to bread?", "choices": ["chop", "toast", "bake", "slice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234067, "question_id": "ci73XXPY4ufwcMtgRzLjW4", "question": "What type of flooring pattern is shown?", "choices": ["floral", "hardwood", "animal print", "checkerboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177556, "question_id": "ciQfgQ4qbSgB5Wnw2D5FXe", "question": "What is kite string actually called?", "choices": ["kite line", "kite thread", "kite wire", "kite string"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177556.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115049, "question_id": "cjKpmBij98txU2dEsNbttZ", "question": "What kind of object is directly behind the flowers when looking away from the photographer?", "choices": ["tree", "curtain", "window", "fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115049.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171462, "question_id": "cjuzvRPZWrF98fQyWyGF8y", "question": "What sport is represented on the cake?", "choices": ["football", "hockey", "baseball", "badminton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171462.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325376, "question_id": "cmQevVbPNhMZjK9PXRXRMx", "question": "What is the food habit of bear?", "choices": ["herbivore", "none", "carnivore", "omnivore"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325376.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 327035, "question_id": "cn8wYiYvsS6nCf5SV3N4WC", "question": "If the cat moved further up toward the pillows how would it feel?", "choices": ["warmer", "sicker", "cooler", "more excited"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000327035.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7266, "question_id": "cnXxDL7ZRbTRwcHKMaGjuz", "question": "What type of area is shown?", "choices": ["desert", "commercial", "residential", "forest"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000007266.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131962, "question_id": "coDiREFKAfETFFJE88BXfd", "question": "What may you not do when heading up this alley in this direction?", "choices": ["u turn", "stand", "drive", "walk"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000131962.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319149, "question_id": "coRVDTyBjg7EWF2NPwErKH", "question": "The bottom of the left most unit here does what to liquid?", "choices": ["purifies it", "freezes", "maintains it", "heats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319149.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324854, "question_id": "coxBzfkSKnCt8JaPsbw2bP", "question": "What is the green item on the left?", "choices": ["giant", "tree", "banana", "lime"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000324854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59800, "question_id": "cpSvoTqJkH6qWMgf6togaX", "question": "How many stop signs would you expect to find at this intersection?", "choices": ["one", "four", "three", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239207, "question_id": "cr5nUy8JSBzrtcAw93Nm2R", "question": "What is the profession of the person that made this?", "choices": ["baker", "artist", "chef", "welder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10873, "question_id": "crQeW7jSuX7nES7TWz3XbC", "question": "Why does the woman have her arms out?", "choices": ["balance", "catch", "gesture", "wave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010873.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283807, "question_id": "crVx8JvNUiy2TdCMSa4czs", "question": "This makeup look is for a holiday in what culture?", "choices": ["italian", "mexican", "french", "somalian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283807.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198802, "question_id": "csWNgx4eHk74CigojkojNV", "question": "The cows here belong to an owner of what?", "choices": ["car lot", "truck garden", "nothing", "dairy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385613, "question_id": "csi2hzWD3y7dP4hmM32aXY", "question": "What is original color of Zebra's stripe?", "choices": ["grey", "brown", "black", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385613.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341599, "question_id": "ctTXnniQRvXR2ZDmsAseNt", "question": "What type of company is branded on this plane?", "choices": ["clothing brand", "delivery/ shipping", "cars manufacturer", "premium airline"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341599.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336855, "question_id": "ctVtr8J5YmpNL6LdBmsQQF", "question": "What are the zebras doing?", "choices": ["foraging", "resting", "drinking", "leaving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336855.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 274648, "question_id": "cu498PBumXFLrgYLzFDuYr", "question": "What is this type of plane commonly called?", "choices": ["tripod", "triplane", "bipod", "biplane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000274648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412016, "question_id": "cv7F6c5EALbPnjNcWFA9TR", "question": "Where is the person?", "choices": ["outside", "restaurant", "theater", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412016.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397924, "question_id": "cvLCEtqBsrK9Vv594tXB52", "question": "What is on the stick in the babies mouth here?", "choices": ["milk dud", "lollipop", "bristles", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397924.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575234, "question_id": "cvTgZCxFU3Q3ewkH9YkhnT", "question": "What is this land used for?", "choices": ["farming", "construction", "sports", "pasture"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154548, "question_id": "cwJLvWQTmaSmJ2Bqh8EMtv", "question": "Items made from what are turned on the item on the right?", "choices": ["rocks", "wood", "sand", "vinyl"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 502544, "question_id": "cxdSCQVCMQSWMdSYD35Pjn", "question": "Which abbreviation can be used to describe the stack of cards?", "choices": ["rsvp", "brb", "vs", "lb"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000502544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462600, "question_id": "cyw2GNDYsqJLsdkh2qXGq2", "question": "What wraps around the tree?", "choices": ["hay", "bench", "vine", "fruit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18315, "question_id": "czEd5pyPBPDiKUBj3rLDTx", "question": "By what method did this animal gain this resting spot?", "choices": ["taxi", "walking", "flying", "crawling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000018315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539192, "question_id": "czvhhsgT5dsw7JzUF2bzDb", "question": "What is this cat trying to do?", "choices": ["eat", "hide", "attack", "sleep"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000539192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484118, "question_id": "czwgZsvbbvQUQWei5gjAuB", "question": "Which part of her skin is visible?", "choices": ["fingers", "feet", "nose", "eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461312, "question_id": "d2DxAsYzJz9s7TxXxCEf7d", "question": "What number is the train?", "choices": ["five", "four", "44", "41"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461312.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98542, "question_id": "d74deT9Jzath2ACiJ3yNMr", "question": "Why are the zebras lighter in some areas and darker in others?", "choices": ["stained", "artificial light", "sunlight", "painted"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138427, "question_id": "d7Gwuv7saQQw7PfCtfGDdF", "question": "What is the air temperature in the area around the traffic light?", "choices": ["hot", "warm", "mild", "freezing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111464, "question_id": "d9NgUK7w6hL7qgQ3HvvqHo", "question": "What kind of energy is produced on the hill side?", "choices": ["solar power", "nuclear power", "hydro power", "wind power"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348276, "question_id": "dAywwZrGNKHtE66tVwunBn", "question": "What character is being depicted on the skis?", "choices": ["lego man", "mario", "transformer", "batman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364682, "question_id": "dBJB7ZbXR5gv8m7S7cfdz3", "question": "What type of transportation is shown?", "choices": ["rail", "water", "air", "land"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000364682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167177, "question_id": "dBUnbNL3QbMeiyAEuLHE4B", "question": "What is the white animal on top of?", "choices": ["grass", "box", "nest", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570475, "question_id": "dCymzYN4qUxDt99JC8KBRK", "question": "What type of ethnic food came in the box?", "choices": ["italian", "english", "french", "german"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114031, "question_id": "dDErnyLE6BHkZwxVubouFu", "question": "What is in the window?", "choices": ["dog", "mannequin", "clock", "woman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114031.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45250, "question_id": "dEMQpKHNZxxvp4WEonfbcQ", "question": "What happened to windows that were originally here?", "choices": ["stolen", "nothing", "inside", "broken"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045250.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214613, "question_id": "dEXGjq4KhifeFfCswWWSA8", "question": "What placed the items inside this mesh?", "choices": ["home owner", "bird traders", "bird", "trapper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214613.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477558, "question_id": "dHCXeHutic2rZKoHaEPqJn", "question": "What does the guy in the upper right off the gate do for a living?", "choices": ["football player", "baseball player", "doorman", "photograph"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477558.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334528, "question_id": "dJSqx3VafpUWGvGS7XEWz8", "question": "Where is this bathroom located?", "choices": ["school", "office", "home", "library"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334528.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231482, "question_id": "dJoCv8jydW6Zt6XbRY22fC", "question": "What type of outfit is the woman wearing?", "choices": ["skirt", "bikini", "dress", "wet suit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282939, "question_id": "dJvCVH6JTvEdfDGt6gzyoC", "question": "What is the vertical stain beneath the clock?", "choices": ["paint", "corrosion", "popcorn", "blood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297057, "question_id": "dK2gzdTeeovticSATMx3aK", "question": "What is the primary purpose of the room?", "choices": ["recreation", "sleeping", "storage", "business"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82712, "question_id": "dKBGunPQCP9UsozGX49brJ", "question": "There is a sticker on here which resembles the logo of which kitchen brand?", "choices": ["rubbermaid", "foodsaver", "corningware", "tupperware"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082712.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567799, "question_id": "dLHyRLMJHMbMU6XmZhcdfC", "question": "What do the items stacked up near the hydrant look like?", "choices": ["grapes", "apples", "pears", "carrots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567799.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49703, "question_id": "dLbb9uHS3AqXjTnfZCzHW5", "question": "What is the most likely age of the person making a joke out of the fire hydrant?", "choices": ["19", "22", "15", "32"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000049703.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384880, "question_id": "dLprcHZuABPQARQn7JEfkp", "question": "What is the fleet of Vectisblue?", "choices": ["105", "115", "180", "110"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511773, "question_id": "dNMi7KxB57mQDxZtpbdSmB", "question": "What species is most visible here?", "choices": ["canine", "rodent", "human", "feline"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511773.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466540, "question_id": "dNbNUBt3E9yh7tZC7C2PdZ", "question": "What is the best place in this room to put a television?", "choices": ["back", "left", "front", "right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466540.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116302, "question_id": "dNzSPwPajtivBBfwaSJ3Jw", "question": "In what century was the black item on the right invented?", "choices": ["fourteenth", "twenty first", "tenth", "nineteenth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135581, "question_id": "dQ6Fo4LDGLXQh58XEisVEx", "question": "What is near the computer?", "choices": ["coffee can", "soap bar", "cat", "wire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541389, "question_id": "dQ8rAPRibvBbTdeFv6HVxr", "question": "What type of information is provided by this object?", "choices": ["date", "brand", "temperature", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146527, "question_id": "dQV9wgn9ceXaodwmuefBv4", "question": "What is the air temperature in the area surrounding the clock tower?", "choices": ["freezing", "hot", "cool", "warm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146527.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89842, "question_id": "dSSGPDXK5wsyADqEkrGA2x", "question": "What is the size of this pizza called?", "choices": ["medium", "large", "personal", "extra large"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270187, "question_id": "dSzmqNqFdkkQQMaA5aHxqA", "question": "What type of mouse is seen here?", "choices": ["wired", "stuffed", "animal", "wireless"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105413, "question_id": "dTHwekjJ7DtKRRDYigbmfg", "question": "What sound does this animal make?", "choices": ["woof", "moo", "baa", "hiss"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32216, "question_id": "dTUtjJkserH4XHrpRE6BJY", "question": "What type of area are these animals located in?", "choices": ["coast", "mountain", "forest", "field"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032216.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195442, "question_id": "dTXKXwqZov3mEszVCyqzs7", "question": "What is the man wearing to hide his fingerprints?", "choices": ["rings", "fake nails", "mittens", "gloves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195442.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10873, "question_id": "dTice7mj3anW2vcEMM5UaE", "question": "What direction is the sun in based on their shadows?", "choices": ["picture's right", "picture's background", "picture's left", "picture's foreground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010873.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570161, "question_id": "dV6LXUkSqd9QcWU9CwdEQq", "question": "What company is known for selling these food items?", "choices": ["mcdonald's", "chipotle", "taco bell", "nathan's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272106, "question_id": "dV8MGSrHbXKBNP5YNE4eTF", "question": "The 10.8 figure is a unit of what?", "choices": ["time", "currency", "weight", "energy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 307537, "question_id": "dVm3GZzZGZBPVoTbUborwG", "question": "What can be found here?", "choices": ["mouse", "soda bottle", "cloth", "helmet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000307537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277733, "question_id": "dWD3AAgMCpi2WMkrRyhrkL", "question": "What color is the kitty cat?", "choices": ["orange", "black", "green", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277733.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 526812, "question_id": "dWSygva85yDpoGXt6LGSHf", "question": "Why is the door jamb rusty?", "choices": ["bad paint", "lacks maintenance", "is feature", "exposed salt"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000526812.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355892, "question_id": "dWcq9rtVPR2Sa4guWHhXg2", "question": "What was this graffiti likely done on?", "choices": ["bus", "wall", "ground", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153273, "question_id": "dWr4tzdnoScf5owoEE4xkn", "question": "Where are the zebras standing?", "choices": ["behind trees", "on pavement", "near people", "in grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153273.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546028, "question_id": "dYDGSamjSEUVbA7hNDVXkW", "question": "What kind of room is displayed on the above picture?", "choices": ["bed room", "living room", "kitchen", "wash room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546028.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388589, "question_id": "dYnxBqNy7x2yMbeAnYoagc", "question": "What kind of business is this airline in?", "choices": ["fire assistance", "passengers", "military", "cargo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000388589.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334608, "question_id": "daNVJ9q92HDsa6vunhiZZu", "question": "What was the first name of the woman who first cultivated this type of apple?", "choices": ["maria", "susan", "stephanie", "rachel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146995, "question_id": "dbV7S9bqKLjS8c95B4Yi5z", "question": "What is the capital city of this airlines home country?", "choices": ["riga", "helsinki", "warsaw", "moscow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146995.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533306, "question_id": "dcZ8dKWHETcEEu2cNqVYHE", "question": "The air temperature in the woods where the woman is riding the horse is what?", "choices": ["warm", "hot", "freezing", "cool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33437, "question_id": "ddL7dHss3bvXKSeu8HNei6", "question": "What time of day is it?", "choices": ["11 am", "sunrise", "2 pm", "mid day"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033437.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524870, "question_id": "deBmHcinofgQmbUgDdyARq", "question": "What are the giraffes eating?", "choices": ["cheese", "tree", "meat", "fruit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524870.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226129, "question_id": "deiMWN7i9d7fwA7WWas49w", "question": "What is the dog eating?", "choices": ["fish", "chicken", "steak", "stuffed animal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226129.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296237, "question_id": "dfg5NVHjGkL9E399dFLK2Q", "question": "What is needed for this activity?", "choices": ["sand", "snow", "water", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296783, "question_id": "dfrMpGTwybE6iV9nm7YE3W", "question": "Why are the two dogs opening their mouths?", "choices": ["breathe", "relax", "happiness", "fun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170197, "question_id": "dfzTADcSFPxwjLXv3pUtBz", "question": "What is the skier doing at the moment?", "choices": ["stretching", "observing", "skiing", "posing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170197.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111464, "question_id": "diuExVBrj8iMARgrCVztNn", "question": "The items on the hill top produce what?", "choices": ["electricity", "music", "bird nests", "safe barrier"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67649, "question_id": "dkd4jL86jLzWBb7Ru79c4X", "question": "What type of candy is in the pink wrapper on the left?", "choices": ["gummy", "licorice", "sour", "chocolate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000067649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54044, "question_id": "dmb5oNqmjGGWiakQSw2VzT", "question": "What does 1155 indicate?", "choices": ["score", "current time", "temperature", "time remaining"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5239, "question_id": "dn3ynjtUJWmW3RHEKumysq", "question": "Which giraffe probably runs the slowest?", "choices": ["loner", "second tallest", "smallest", "tallest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005239.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 13600, "question_id": "dnSwxdBp4uTUnGQRYCzSXH", "question": "Why is he wearing a suit?", "choices": ["warmth", "uniform", "costume", "business"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000013600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267107, "question_id": "dnXvSBMyj3sFi8fPW7uhSq", "question": "What sort of facility is this case sitting in?", "choices": ["nursery", "depository", "hospital", "library"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267107.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33433, "question_id": "dnYbGkVRJvEUoFHCDogthR", "question": "What is the blue object used for?", "choices": ["telling time", "clothing fabric", "symbolizing love", "swatting flies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536978, "question_id": "dnstzcz4nNKtuVGs3BFAE8", "question": "What kind of light illuminates this photo?", "choices": ["black light", "uv light", "night light", "sunlight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61691, "question_id": "do2NYet6AWsNiwCN6KVfsk", "question": "What is the silver circle on the ground used to do?", "choices": ["dispense soap", "store pills", "clean shoes", "drain water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395493, "question_id": "dp6ALUejPugqpfRzVGJBYE", "question": "What is this photo taken for?", "choices": ["display", "learning", "nothing", "fun"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000395493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339899, "question_id": "dpqUfhtvTikfn65no2hyV7", "question": "What is located between the man and the dog?", "choices": ["leash", "car", "cat", "cow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339899.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323643, "question_id": "dq7fwMsdgbZLJSWeg4HU9r", "question": "What is he holding out to keep his balance?", "choices": ["arms", "legs", "board", "fabric"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000323643.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41344, "question_id": "dquQcviD4AeoL9bjWMYNYS", "question": "What is the child wearing?", "choices": ["boa", "feathers", "fedora", "goggles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041344.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188783, "question_id": "drSJ9EJ6sVEYwuxiBV5jP3", "question": "What animal was used to make this meal?", "choices": ["shrimp", "zebra", "pig", "shark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554831, "question_id": "drvqUHLzd3fkVD5TUPSg6H", "question": "What is the purpose of the words on the surfboard?", "choices": ["advertising products", "entertainment", "artwork", "activism"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000554831.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493909, "question_id": "ds6aMkTtV3bP2oRhEB4SJq", "question": "What are the animals standing in?", "choices": ["water", "rocks", "snow", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493909.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291859, "question_id": "dsH3Ph4qgJdri6Y978V8zK", "question": "Where is this man situated at?", "choices": ["desert", "residential zone", "savanna", "urban area"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291859.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414928, "question_id": "dt9sk3Nx6fSuM39XmeVa8c", "question": "What sports equipment is on the grass?", "choices": ["baseball", "basketball", "soccer", "football"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414928.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 78722, "question_id": "dtM48zbNHz5ZaogWKpLZmj", "question": "What does the tennis player display on his head and wrists?", "choices": ["sweat bands", "watch/cap", "tattoos", "advertisements"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000078722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262570, "question_id": "dtMruJoo8c3BY77UzpdgFM", "question": "What weather allows the person to participate in this sport?", "choices": ["sunny", "windy", "rainy", "snowy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285036, "question_id": "dtkcEngd5BvLJ3KcCU3mwq", "question": "How does this aircraft generate thrust?", "choices": ["propeller", "floats", "turbine", "jet propulsion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462137, "question_id": "duv97NQCvxsQGx3355hCRX", "question": "Why are the birds here?", "choices": ["feeder seeds", "finding prey", "to hide", "to rest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462137.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215352, "question_id": "dvHy2hbNHDmiCg22vSfe8y", "question": "What is the animal in the window most likely looking at?", "choices": ["birds", "elephants", "alligators", "giraffes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215352.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250144, "question_id": "dx2yvczeox8xh45bKZkvvZ", "question": "Which animal shown here is more likely to eat the other?", "choices": ["elephant", "none", "donkey", "zebra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560299, "question_id": "dyogw8qErUBXhzVBxiXUwV", "question": "What is causing the change in wall color?", "choices": ["bad paint", "sun glare", "camera flash", "lamp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560299.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559673, "question_id": "dzB4tCaN3AtgwvX3Axjsx7", "question": "Why is the banana placed in a green container?", "choices": ["to sell", "to peel", "to paint", "to carry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559673.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91727, "question_id": "e2LxW5cjaRzcxu4AgQu387", "question": "What is the action the man is attempting here called?", "choices": ["volley", "whip", "smash", "serve"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229086, "question_id": "e2RDdrY5Ao2yEYb37KN2L4", "question": "In which hand does this person hold the item that can provide energy to the other item?", "choices": ["both", "right", "none", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542588, "question_id": "e2WXG2JADVEdq3N8RgcVnj", "question": "What animal usually lives here?", "choices": ["dog", "fish", "cat", "tiger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542588.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379851, "question_id": "e4vgGrmA7k4zYrFW52YJHm", "question": "What is touching the skateboard?", "choices": ["dress shoe", "whisker", "paw", "sneaker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438007, "question_id": "e4vuFq86txECBXu8qAAAan", "question": "What are the blue things growing on the plant?", "choices": ["wheat", "apples", "danishes", "berries"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438007.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158496, "question_id": "e54emHEXzPkirGxtSssno9", "question": "What appears to be the dog's job?", "choices": ["police duty", "guard duty", "retrieving", "foraging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158496.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256187, "question_id": "e57gPjaxMccz3tiGdmDAFB", "question": "What is the dog holding with it's front foot?", "choices": ["bowl", "rope toy", "frisbee", "bone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393066, "question_id": "e5bY6uXbwQyD7AVQUpRuvF", "question": "What kind of pet does the man have?", "choices": ["dog", "cat", "bird", "rabbit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490698, "question_id": "e5bhAQwzDd6UK9tuw7gaPC", "question": "The long items are usually eating with what other type of food?", "choices": ["fries", "soup", "porridge", "fried rice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490698.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377435, "question_id": "e5uQw2dQVgrWbX8cQWLwrw", "question": "How long has the little one likely been able to walk?", "choices": ["few decades", "few centuries", "few years", "few months"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 78698, "question_id": "e5v4zRmbTpZ8uxtfKfDXwe", "question": "What is this place?", "choices": ["prison", "shopping mall", "palace", "train station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000078698.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171956, "question_id": "e6TjVLhDvRKf2vT6JmrqUm", "question": "Who is from the country that has a similar name to the second word on the sign on the right?", "choices": ["desiree nosbusch", "marc antony", "benedict arnold", "jason bateman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171956.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342329, "question_id": "e8X6BzDdXfKkskw7SXLinh", "question": "What pattern is the tablecloth on the table the boy is eating at?", "choices": ["stripes", "hounds tooth", "plaid", "polka dot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49568, "question_id": "e8feAYZdEMoUfdzLygiHWg", "question": "What does the number on the top of the bus tell riders?", "choices": ["cost", "route number", "time driving", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049568.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443151, "question_id": "e8hq3zb5yZ8f5GxDrrogLd", "question": "What number does the larger cow have on their tag?", "choices": ["695", "702", "803", "998"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569690, "question_id": "e8kaRvMghyTjZMXY7WLT8i", "question": "What part of the man is touching the car door?", "choices": ["nose", "toe", "eye", "hand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569690.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229693, "question_id": "eB2XP7vZHrMFmd346GfLM8", "question": "Why has he man covered his head?", "choices": ["religion", "protection", "fashion", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229693.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559947, "question_id": "eCJ6zcjWAKyTkadV8ve8Kx", "question": "Which one of these people might perform the hand movement shown here?", "choices": ["traffic cop", "carpenter", "pilot", "nurse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119471, "question_id": "eDeWQANQ7WK2bZbbsjW9bX", "question": "What are of a house could this be?", "choices": ["bedroom", "attic", "kitchen", "linen closet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510338, "question_id": "eDvj3qmQzkdZTzSMm5GHvv", "question": "What do the rainbow like flag on the street light pole belong to which organization?", "choices": ["un", "ymca", "red cross", "lgbtq"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510338.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470131, "question_id": "eDyzz4RqeiWVGZGNtzmueZ", "question": "What was used to write on the cookies?", "choices": ["edible ink", "sharpie", "watercolor", "paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470131.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529406, "question_id": "eEdtE9224tdMkmcbcksnVq", "question": "Why is the water rolling?", "choices": ["wind", "sun", "man", "tides"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529406.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491331, "question_id": "eFWKfpEsLPVT7Cxhn3Fv2R", "question": "What purpose do they probably use this bus for?", "choices": ["business trips", "vacations", "transporting prisoners", "field trips"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491331.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320408, "question_id": "eFwggAVMLR6o8AgPDFyhEM", "question": "What is the general age group of the person who most frequently uses this bathroom?", "choices": ["toddler", "senior citizen", "teenager", "young adult"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457333, "question_id": "eHW8TDYn3YGDTAKWsDi5rW", "question": "What type of bear is shown in the grass and flowers?", "choices": ["grizzly", "brown", "polar", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389794, "question_id": "eJKw73a2DpU2CtDnnDVgnS", "question": "Which one of these times would this meal most likely be eaten?", "choices": ["noon", "6 pm", "8 am", "midnight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29899, "question_id": "eJTZW4gPicE6zLjC34aPPf", "question": "What kind of fence is the dog behind?", "choices": ["spit rail", "plastic", "picket", "chain link"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029899.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559947, "question_id": "eLCrWi8gsHzD8QAdnG9jPH", "question": "What will be able to proceed for another 5 seconds?", "choices": ["emergency vehicles", "pedestrians", "bikes", "cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304224, "question_id": "eLZRkLX8o8bzVhaEUCjZUj", "question": "What were the sign installers afraid might happen?", "choices": ["robbery", "drowning", "assault", "car accident"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304224.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32690, "question_id": "eLdJpGNojsfDC6MhCiVprG", "question": "What activity is this person engaging in?", "choices": ["travelling", "sightseeing", "horse riding", "patrolling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032690.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16159, "question_id": "eMQYLWFN7br9FoijXt3h4q", "question": "What might happen to your vehicle if you park here at 830 am on Thursday?", "choices": ["accident", "nothing", "cash reward", "towed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016159.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415036, "question_id": "eMvRTmVv5qndnAj92zsaRJ", "question": "What is this person doing?", "choices": ["napping", "moving", "reading", "selling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544982, "question_id": "eP3h3YhmCre7kziAQdzV6F", "question": "What frozen treat could you create with the item shown here?", "choices": ["rootbeer float", "lemon ice", "icecream stick", "banana split"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544982.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402005, "question_id": "eP8axzFa8jhoS8ZR222aPD", "question": "To which direction is the woman above looking?", "choices": ["right", "left", "back", "front"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402005.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506192, "question_id": "eQBUgEtkgN9J5j8MA58uDq", "question": "What is flying in the air?", "choices": ["helicopter", "parachute", "plane", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476076, "question_id": "eRUM9JomANja8XTjN7yhAy", "question": "Where is this clock in relation to the photographer?", "choices": ["below", "behind", "above", "beside"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216087, "question_id": "eSE2aDVPHzppXwK6ozHWkt", "question": "What direction are the pointy tops of the umbrellas pointing?", "choices": ["west", "east", "south", "north"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131104, "question_id": "eT7aanrmwmbLTZoVzvYEhb", "question": "Why could be a reason that the cars off the road?", "choices": ["eat lunch", "go camping", "take pictures", "refuel"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000131104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 261820, "question_id": "eTko3DVXwBPW3bKKbs5H86", "question": "The word in red immediately next to the word street is important in what discipline?", "choices": ["accounting", "rocket science", "archaeology", "computer science"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000261820.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74971, "question_id": "eUswdMU9bDqw2bZz63nz6U", "question": "What is being used to hit the ball?", "choices": ["stick", "bat", "racket", "mallet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353453, "question_id": "eVDqyPTRyy7DEBD3XCDkkd", "question": "Where does this animal usually dwell?", "choices": ["desert", "beach", "cave", "tundra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353453.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270339, "question_id": "eVXp4nVMvbsWvYjZpRsgod", "question": "Which vehicle here can hold more people?", "choices": ["bike", "airplane", "car", "boat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547729, "question_id": "eVduqfg6s5PVyoMuwYnhiW", "question": "What is the pressure of water in fire hydrant?", "choices": ["40pounds psi", "80pounds psi", "20pounds psi", "30pounds psi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189823, "question_id": "eVo2mGzXjHzsjEJtEAhRxw", "question": "Which one of these services might this business offer?", "choices": ["health screening", "professional portraits", "brake repair", "dog grooming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476558, "question_id": "eWphD8gxShVJG9QofUYrAq", "question": "What is the name of the type of top the player is wearing?", "choices": ["hoodie", "sweater", "jersey", "t-shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476558.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333933, "question_id": "eWuy6KDi3PEh5dUsedZ2HY", "question": "What is the nickname of the president of this clock company?", "choices": ["clown", "chuck", "buzz", "jimbo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444509, "question_id": "eXAeYm7cSEyyzHZWWeHvFK", "question": "What other tool could have been used to make these small pieces?", "choices": ["saw", "spoon", "ladle", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444509.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 12802, "question_id": "eXDN3uonDEnirBXbbeG25h", "question": "What are the giraffes doing right now?", "choices": ["standing", "sitting", "running", "mating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000012802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557330, "question_id": "eXPVYZzPqL3EBN6qsfWK2Z", "question": "What does the man seem to be doing here?", "choices": ["eating", "announcing", "flipping", "celebrating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557330.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293517, "question_id": "eXkWLQ8NKNVVVkF2vQ9BXy", "question": "This street is also the name of what?", "choices": ["president", "continent", "state", "planet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293517.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 335538, "question_id": "eXrnFuPsq6U4ynhyTZPzAL", "question": "What profession usually wears the red item?", "choices": ["sanitation worker", "army soldier", "clown", "lawyer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000335538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485772, "question_id": "eYUunr5hsYD3sY9efHYn37", "question": "What type of location are these zebras most likely living at?", "choices": ["zoo", "wildlife area", "sanctuary", "hunters refuge"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000485772.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539549, "question_id": "eZcBNfJNRQKgAgeth7HEha", "question": "How did the umbrella most likely get opened by?", "choices": ["both cats", "permanently open", "human", "one cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91168, "question_id": "eZgpbqhceUYk2BUQKeLFmc", "question": "What kind of phone is shown?", "choices": ["pay", "landline", "rotary", "cellular"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091168.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366806, "question_id": "eZio2WijUHFmNat9ZzzNAW", "question": "What is the woman standing near?", "choices": ["cat", "truck", "luggage", "fire hydrant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204313, "question_id": "ebQMT75yYfoAmYSskB7evb", "question": "What is most likely located on one of these streets?", "choices": ["cemetery", "boat docks", "sky scraper", "military base"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204313.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91308, "question_id": "ebirWy5pvimSRe4idJZeQY", "question": "What could the animal in the picture possibly be mistaken for?", "choices": ["sun", "grass", "leaves", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142044, "question_id": "ebnnKkMvZ38uYzcgXTeddB", "question": "What are these treats called?", "choices": ["cake pops", "ambrosia", "cheesecake", "cupcakes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535734, "question_id": "ec9eys5777FeUoy87ds44T", "question": "What is the cat doing?", "choices": ["running", "diving", "eating", "watching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535734.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116991, "question_id": "ecYYoXgELjgGyPA9uwMrdL", "question": "What is the air temperature of the park where this child is flying the kite?", "choices": ["warm", "freezing", "chilly", "cool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54213, "question_id": "ecjYJq74pBHPELpA2QdFrR", "question": "How many individuals will contribute to oaring this nearest boat?", "choices": ["one", "three", "none", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054213.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539421, "question_id": "ed8eYZgM5Xv6Q3HWHUmm55", "question": "The horses on the farm are grazing during which season?", "choices": ["winter", "fall", "spring", "summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5217, "question_id": "eedvXkDarUS486o3kvA2B5", "question": "What group of people are renowned for their crafting of items like the one in the middle of the floor?", "choices": ["albanian", "persian", "indian", "norwegian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005217.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550884, "question_id": "eeswNugnwfcBj7K5NsXtmp", "question": "What animal is more closely related to this animal?", "choices": ["zebra", "dingo", "monkey", "snake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550884.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1657, "question_id": "eet7WReKgPEL5XXEGJL9iK", "question": "What is the type of cleaning performed in the tiled room?", "choices": ["bathing", "showering", "heating", "swimming"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000001657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40097, "question_id": "egUdWB2VokFGJNXjzRL8zT", "question": "What kind of animal is in the sky?", "choices": ["robin", "egret", "squid", "eagle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454309, "question_id": "ehQBKEqVwWeeHVpNYXqM35", "question": "What popular board game has pieces of this colour?", "choices": ["monopoly", "chess", "risk", "cluedo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454309.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433955, "question_id": "ehrdVcmtQ5ihNxXvyJhdnK", "question": "What is the last letter on the boat?", "choices": ["z", "m", "e", "w"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433955.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339263, "question_id": "eiSgymwek34UVnGJv9Qyr9", "question": "What is the relation of the two surfers to each other?", "choices": ["married couple", "friends", "siblings", "cousins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55718, "question_id": "eiW4tAAcpEp9KujQpVZdhA", "question": "Why are orange colored streamers used here?", "choices": ["celebrate halloween", "increased visibility", "none", "discounted pricing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055718.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265921, "question_id": "eigxa8qAu5Eh9hDaKoa8qn", "question": "What type of enclosure is being used?", "choices": ["cage", "barn", "gate", "fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265921.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158343, "question_id": "ejd25FpugAuiH6P6J8ADTb", "question": "The animal here is meant to resemble someone who lives where?", "choices": ["everest", "milan", "equator", "north pole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52024, "question_id": "ek5TEg5rmBQWDThDWo6BDF", "question": "What should the red hatted person do?", "choices": ["stand up", "nothing", "scream", "duck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204391, "question_id": "ekrXFpEfDrkUtzpoYa4MSK", "question": "What is this person most likely to shoot this animal with?", "choices": ["bb gun", "camera", "rifle", "shotgun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204391.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555902, "question_id": "em7rwdrFwiyf5KfGkz8ydz", "question": "Why is the lamb wearing plastic?", "choices": ["warmth", "identification", "safety", "fashion"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000555902.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411394, "question_id": "emfDfyKGoSQStH8CukW7WN", "question": "How are these vehicles powered?", "choices": ["gasoline", "pedaling", "sun", "battery"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000411394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503027, "question_id": "emvYvARqVJP4W2vQgwvQBd", "question": "If you want to traverse Champlain Av which way must you NOT drive?", "choices": ["circle around", "right", "none", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561476, "question_id": "enCbh8NN6Zt5VJ8SEPFnFd", "question": "What is typically found in this kind of weather?", "choices": ["beach party", "snowman", "jungle", "tiger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136488, "question_id": "enXQB9R6M6qiQ8JohgtQqU", "question": "What position is this dog in?", "choices": ["falling", "laying", "pointing", "sitting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136488.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555353, "question_id": "enkiKf83XjiL9Tjd6raDAK", "question": "What is usually done in this environment?", "choices": ["lion taming", "oasis hopping", "fishing", "sledding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542577, "question_id": "enoQ2AMMFdCB2iLPFu9gRo", "question": "What part of the trip is this?", "choices": ["beginning", "halfway through", "repacking", "end"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542577.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460638, "question_id": "eo4e7U5oSX88JwhHSJPCBJ", "question": "The arrow is pointing in what direction?", "choices": ["east", "south", "north", "west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69664, "question_id": "eoEpokzgu3rr4vas5jNE6N", "question": "What type of animal is shown?", "choices": ["reptile", "domestic", "aquatic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000069664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92427, "question_id": "eqHy2Gri87WmrPDhiQYJEH", "question": "What is the jug with the food inside used for?", "choices": ["drinking", "feeding", "storage", "attracting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206813, "question_id": "eqJizDi3NP9ycEf8dnsoeu", "question": "Which one of these items might be in the drawers?", "choices": ["rake", "toilet paper", "underwear", "silverware"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183056, "question_id": "er4LJMTymBbZsPvZ8zAKdF", "question": "What is the most likely holiday going off of the decor?", "choices": ["halloween", "july 4th", "thanksgiving", "christmas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61283, "question_id": "erDiaZSQbs4KgXB4mcAfFT", "question": "Where would a counter and sink like this be located?", "choices": ["hotel", "train", "hospital", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495831, "question_id": "ermRqifa2zw5hDeQRmSPkj", "question": "What is almost as tall as the tree?", "choices": ["giraffe", "ladder", "skyscraper", "man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495831.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 564725, "question_id": "erstZF2ivJ3C3shmNNXyYh", "question": "What is the bear on the ground trying to do?", "choices": ["run", "attack", "hide", "swim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000564725.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537653, "question_id": "etDaoBcxXFxV45RwqDR3rU", "question": "What would cause people to avoid parking on this road?", "choices": ["bumps", "pot holes", "cracks", "parking meters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57243, "question_id": "etaNi6mF76Pd9VoW9aLeNV", "question": "What is this appliance used for?", "choices": ["cooling", "watching", "cooking", "calling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26877, "question_id": "euuaND4Wf6iGAsZNMSdCWv", "question": "Why does he have his arm out?", "choices": ["wave", "gesture", "balance", "reach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176990, "question_id": "evrg5B7TdeH4m2vZdVdsnq", "question": "From which country does this cuisine originate?", "choices": ["china", "ukraine", "greece", "brazil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176990.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352232, "question_id": "ewHgXzkCPcR4idwUs8cwK3", "question": "What part of the elephant could injure the bird?", "choices": ["tail", "trunk", "tusk", "foot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352232.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507186, "question_id": "ewRi5YzhtRtK5GhaRjvNwo", "question": "What plant has more leaves?", "choices": ["right", "top", "none", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507186.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69104, "question_id": "ewT5a27rHUN5bxtnEyQbNZ", "question": "What is the person wearing?", "choices": ["bucket", "glove", "feathers", "backpack"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000069104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 496769, "question_id": "ewYPTArW4MYDjrXertqMue", "question": "The person seen here does what?", "choices": ["descends", "ascends", "falls", "camps"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000496769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15358, "question_id": "ewqrfQK9zmnu9eYzchSkxQ", "question": "How many children have backpacks on?", "choices": ["one", "three", "five", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354208, "question_id": "eyXiiQ8nBx2K9nQqQs6YyL", "question": "Who is the cat trying to get the attention of inside the house?", "choices": ["other cat", "hamster", "dog", "owner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54852, "question_id": "eysB2T3rKv3k8y5m88CpQg", "question": "This type of plate makes what easier?", "choices": ["cleanup", "cooking", "driving", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054852.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124475, "question_id": "ezD3XXedSTBAjdnp7AfBn5", "question": "What type of animal is shown?", "choices": ["domestic", "wild", "reptile", "aquatic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399823, "question_id": "ezQuGEiBp7UeiFeVcUe3Ab", "question": "What company has a similar item to the yellow item?", "choices": ["chiquita", "delta", "ibm", "livestrong"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155660, "question_id": "ezbAZz2AN5jVMDG95JHsVy", "question": "Who played the character whose name appears on the sign?", "choices": ["joaquin phoenix", "jennifer connelly", "eva green", "michael keaton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386775, "question_id": "eztY9zrSRCxrqdcfQHVWng", "question": "What is the weather like at the beach where the kite is flying?", "choices": ["mostly clear", "rainy", "overcast", "snowy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378505, "question_id": "f27JL5RHpCUrhG7FpaXon6", "question": "What is the child likely leaving on the stand?", "choices": ["scuff marks", "legal documents", "candy", "apples"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 278703, "question_id": "f37JWNpnnx8ya46vgASQnB", "question": "What is the person about to jump over?", "choices": ["train track", "cat", "television", "horse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000278703.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229042, "question_id": "f3VA6LV7gJPXCnrvcQwPRU", "question": "What part of the animal can be seen?", "choices": ["wings", "hoof", "fangs", "horn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229042.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345592, "question_id": "f4WPKonu87xM4jjWieUcCe", "question": "What activity takes place in the glass enclosed space shown here?", "choices": ["sunbathing", "shaving", "drying", "showering"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138266, "question_id": "f62jmj6BMMmTYqiRKN7stM", "question": "How many of the giraffe have a neck on the smaller side?", "choices": ["three", "one", "zero", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138266.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93514, "question_id": "f6vDTWYoNoXG6Tx8Cgfudp", "question": "What type of environment are they skating in?", "choices": ["coastline", "desert", "urban", "farm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373885, "question_id": "f7BkPVyM69DJt8dTpxpFgE", "question": "What is the largest feature on the white-and-grey's face?", "choices": ["nose", "teeth", "eyes", "ears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373885.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284619, "question_id": "f7CvAjQo3SHT8TmYwCSxYX", "question": "What is the smaller zebra doing?", "choices": ["being annoying", "feeding", "fighting", "biting other"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284619.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7188, "question_id": "f7N7LiskbE8dNbdibSMtNo", "question": "What type game does the owner of this Lap Top enjoy?", "choices": ["chess", "checkers", "mah jong", "poker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007188.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290926, "question_id": "f7UprDWy6r9Q66VLCe94tF", "question": "What famous sports star wore a number that is one number higher than the number on the plane?", "choices": ["ozzie albies", "wayne gretzky", "michael jordan", "jackie robinson"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290926.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549097, "question_id": "f835T5SknnqYK52PJZALph", "question": "What was used to produce the colors on the pots?", "choices": ["glaze", "ink", "paint", "dye"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532422, "question_id": "f8kEyexLFcwxjHgcqETrSS", "question": "What number is the small hand nearest?", "choices": ["12", "three", "11", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338958, "question_id": "f9WxX3oRcwqd4ZLN6eAuqf", "question": "What food group is there the most of?", "choices": ["grains", "dairy", "fruits", "vegetables"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133480, "question_id": "f9gkfJ9PEZcRMmqgWvV9Bq", "question": "What species is flying these objects in the air?", "choices": ["frogs", "people", "aliens", "mechanics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133480.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352479, "question_id": "f9iUoYDxL4V2DuRcJGjqEF", "question": "Why does she have her head covered?", "choices": ["disguise", "warmth", "costume", "uniform"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495506, "question_id": "f9pisvjnhmdHdHEs5QovvY", "question": "What sound is loudest in this location at the top of the hour?", "choices": ["screams", "birds yelping", "honking", "bells"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495506.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348745, "question_id": "fALf5riwJRfyekRTYd94E7", "question": "What does the small rectangle on the upper part of this bench represent?", "choices": ["sitting fee", "warning", "dedication", "time limit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506118, "question_id": "fB8BiUEbDkfGhhaXJHXr3u", "question": "In what state does the NFL team that uses this bird as a mascot play?", "choices": ["new york", "florida", "illinois", "arizona"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127482, "question_id": "fBmiasfZAoP62ZNzN2cXPs", "question": "This animal is called what when it is just born?", "choices": ["foal", "kitten", "puppy", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357806, "question_id": "fBpbJo4gRjwdhJNaWAWZAP", "question": "What would this baby animal be called?", "choices": ["kid", "joey", "calf", "puppy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311136, "question_id": "fCH3LyGZMUBDuw8d9Y6Jud", "question": "What kind of site is this?", "choices": ["construction", "historical", "accident", "web"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451486, "question_id": "fDbcvNWQ9pnmg8MtdyqjFJ", "question": "How many people in modern history have lived as long as the number on the bus?", "choices": ["five", "two", "none", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451486.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164210, "question_id": "fF3ceJJ3fYvkoPGSguLxQE", "question": "About how tall is the tallest animal here?", "choices": ["4.75 meters", "4.75 feet", "10 meters", "2.5 meters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164210.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332370, "question_id": "fGSze3bn3rBBZmpq3DcEyi", "question": "What starchy food forms the base of the pizzas on the tray?", "choices": ["muffin", "dough", "bread", "bagel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332370.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279267, "question_id": "fGxzL7ZhKNkkM6fDNpuZL3", "question": "Where is this person?", "choices": ["farm", "mountain", "forest", "beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488714, "question_id": "fHnyxPQqL7kdpn68yJiU7c", "question": "In which country was this turboprop plane stationed out of?", "choices": ["philippines", "taiwan", "japan", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574230, "question_id": "fHuAxfqbgFQqejKVpbtgTk", "question": "In which country that has yellow vehicle registration plates is this woman located?", "choices": ["israel", "netherlands", "united kingdom", "luxembourg"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000574230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289784, "question_id": "fHy7D9xT3GKk57Qk9wHwjZ", "question": "What style fencing is the bird sitting upon?", "choices": ["none", "barb wire", "picket", "decorative"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229022, "question_id": "fP5UBgzVDWmgtaJ2rgNGcS", "question": "What is the man throwing through the air?", "choices": ["ball", "disc", "puck", "bottle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229022.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113194, "question_id": "fQ95jejjLcKNXDrNTtk4em", "question": "What is the job of these elephants?", "choices": ["pull", "push", "carry", "jump"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367948, "question_id": "fQDUqTHpSWKq3RTpx6EWhB", "question": "What color is the towel in on the side of the bathtub?", "choices": ["blue", "burgundy", "sea green", "salmon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223958, "question_id": "fQiwoH6RNDZVw5A6J8NNZc", "question": "What is the green sign hung to alert drivers to?", "choices": ["accidents", "construction", "school zones", "streets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35086, "question_id": "fQtQHHRMAK5nEe7h494ssG", "question": "What is normally stored in the object the bear is raiding?", "choices": ["drinks", "money", "tools", "burgers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273977, "question_id": "fRErXQSPJL4pJY3DHDmvxu", "question": "What is this type of road junction called?", "choices": ["highway", "dirt road", "roundabout", "dead end"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90688, "question_id": "fRMEJvaWWct8LJyKdAkhUy", "question": "What would most likely be used in these cups to open a bag of frozen peas?", "choices": ["tweezers", "pen", "highlighter", "scissors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222876, "question_id": "fSQryngD84KVY8oHsmApFB", "question": "What is the bird doing?", "choices": ["feeding", "resting", "hiding", "singing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 423387, "question_id": "fSiRw648wb2pCSgcekzWRg", "question": "What execution device do the signs on the top resemble?", "choices": ["guillotine", "electric chair", "crucifix", "noose"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000423387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385055, "question_id": "fSxhp7iJhkTYWbQU5jqtyD", "question": "What is hanging up beside the window?", "choices": ["sign", "picture", "curtains", "rag"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552, "question_id": "fUUDpYXvKiV8puNicBtjtD", "question": "What is the main theme the tennis player is trying to display with her outfit?", "choices": ["black", "grey", "white", "pink"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000000552.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451209, "question_id": "fV37KcPHvjyh6sH3Ve9gUk", "question": "What item seen here will alert this man if there is a fire?", "choices": ["nothing", "thermostat", "smoke alarm", "spouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80198, "question_id": "fVC57rU4DEDrRErLYRZJbP", "question": "During which season was this bicycle being ridden?", "choices": ["winter", "spring", "fall", "summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 502589, "question_id": "fVX2C46NjboQSzsWCGjt3W", "question": "What is the woman doing to the doll?", "choices": ["holding", "throwing", "staring", "curdling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000502589.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501908, "question_id": "fWGBPebTKfLchwP8FmhgzJ", "question": "The item with the pink handle will remove what?", "choices": ["stains", "water", "color", "lint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510682, "question_id": "fWHWSj2JvhDmUvqwCq5rKM", "question": "The cheese and vegetable toppings are placed on what type of food?", "choices": ["bagel", "sliced bread", "dough", "english muffin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251, "question_id": "fWK5QcukVJaUsJut6bJjT5", "question": "What part of the wave is the surfer mostly on?", "choices": ["face", "whitewater", "tube", "lip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000251.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9470, "question_id": "fWy3gweGoVNYMLQKwVyYQf", "question": "What type of machine is behind the giraffe?", "choices": ["change", "token", "coke", "picture booth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572562, "question_id": "fWyNb6effhjcwFpahc8TJY", "question": "When dogs are here who must be with them?", "choices": ["cat", "another dog", "horse", "person"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572562.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379752, "question_id": "fX4c7ThEumf3tEEwGbAbDq", "question": "What is missing from this object?", "choices": ["cord", "paper", "mouse", "key"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537462, "question_id": "fXWxVoZ95yYU6WAExC2MPk", "question": "What would be the closest style of the room?", "choices": ["classic", "natural", "retro", "modern"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537462.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 313293, "question_id": "fZRzLWAi4TzPMYwUmuQFFx", "question": "What do the signs with the crossed P mean?", "choices": ["no poaching", "no parking", "no crossing", "no panicking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000313293.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108257, "question_id": "fZT8GBnFHPXhqRknq5nWwj", "question": "What is the energy that lights up this photo?", "choices": ["sunlight", "coal", "batteries", "steam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259908, "question_id": "fZwiVa5vcAg75SJq7ipNQN", "question": "What kind of animal is being displayed on the train?", "choices": ["microscopic", "scaly", "winged", "furry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494071, "question_id": "fbSPaBqBi9tWqkEZLrie3Q", "question": "Which one of these items is probably used to feed him?", "choices": ["conveyer", "bucket", "bottle", "trough"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494071.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231513, "question_id": "fbsgtTdiTsiMGN3pxConsf", "question": "This Flag represents which country?", "choices": ["scotland", "norway", "uk", "ireland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231513.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269286, "question_id": "fdQFcQ26gj8yesct3UjgqG", "question": "What age person is this room for?", "choices": ["adult", "child", "elder", "young adult"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362009, "question_id": "fdcVNYbiA9wAX5dYsabzfg", "question": "What is this type of room called?", "choices": ["living room", "mudroom", "bathroom", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362009.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215352, "question_id": "feKDcoZsuDtB97YywtbeaK", "question": "This animal is known for having what feature?", "choices": ["stinger", "gills", "quills", "claws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215352.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474044, "question_id": "feYdB3xZQmFFLUmADKbfSs", "question": "Which position will the red jacket most likely finish in?", "choices": ["second", "fourth", "first", "third"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106600, "question_id": "ffmWP8ncpiJmoqUqdRawyW", "question": "What is most likely attached to the strings that the man is holding onto?", "choices": ["balloon", "canopy", "blimp", "kite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104942, "question_id": "fg3d9u2zyKHamkbkPHWc6a", "question": "What will happen to the cut piece if it's left on the table for too long?", "choices": ["disappear", "freeze", "turn brown", "ripen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245541, "question_id": "fg58FpfFfCKh8CX3u8VxXE", "question": "What is the brown circular object on the ground called?", "choices": ["manhole cover", "circle sign", "under cover", "woman cover"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449386, "question_id": "fgHkmXofo9dhtKmEt2Agk2", "question": "Where is this bench located?", "choices": ["taxi stand", "grocery store", "bus stop", "train depot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379200, "question_id": "fhUzwFcDx9n5WKVxAhtXEC", "question": "What comic strip does the dog by the comb come from?", "choices": ["peanuts", "garfield", "family guy", "marvel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 242154, "question_id": "fhvGttWjdK2dMLRF9Cd7m8", "question": "Where would you have to go?", "choices": ["right", "reverse", "straight", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000242154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554135, "question_id": "fiDBb4pLwNwydskziEUvtW", "question": "What kind of bear is shown?", "choices": ["panda", "grizzly", "polar", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000554135.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169618, "question_id": "fkVzeFBzTZGsvXqXcADovw", "question": "The bus here is owned by what type of organization?", "choices": ["religious", "school", "basketball", "government"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169618.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321669, "question_id": "fmD3he6QEK3vzVHU7Zd3yr", "question": "Which color shirt does the surfer with the best stance have on?", "choices": ["black", "yellow", "blue", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485713, "question_id": "fmzfL2qTcDFA76JY6wLnUz", "question": "What type tree shown here might be likely to be nibbled by this animal?", "choices": ["oak", "palm", "pine", "clover"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177401, "question_id": "fnHaPCGx6ZiMRzBz5gFUjP", "question": "What is flying in the sky?", "choices": ["kite", "bird", "airplane", "helicopter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343209, "question_id": "fneGANdPwsNEs3BjR5vnqA", "question": "What type of phone is he using?", "choices": ["rotary", "cellular", "pay", "landline"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455266, "question_id": "foNy2iKmzJJtfujoA3LmJQ", "question": "What is the baby dressed as?", "choices": ["monkey", "antelope", "cat", "robin hood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455266.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438493, "question_id": "foteVMCaNr7j3bieC9Nrdd", "question": "The blanket is taking the place of a what?", "choices": ["fridge", "desk", "bed", "table"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169059, "question_id": "fpyRQYPgzuz8Ji3wqQJ7jR", "question": "Someone utilizing this space must clean themselves most fully how?", "choices": ["take bath", "toweling off", "showering", "flicking water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452402, "question_id": "frES6wj8m9KRw4u3SQs8c2", "question": "What are the dogs walking on?", "choices": ["field", "ocean", "cement", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452402.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490770, "question_id": "fsMmXzpxkQSdYQWQwYMTGS", "question": "What substance was the food item shown here boiled in?", "choices": ["oats", "milk", "water", "oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490770.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353065, "question_id": "fsyfSCmw3JvRy3D8KfUVSo", "question": "How many people are on the bus if the number at the top of the bus is the number of current customers?", "choices": ["210", "175", "191", "85"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574927, "question_id": "ft4CRNDcP4We5Lh2LHRL7Z", "question": "When in use what does the plate inside the machine do?", "choices": ["heats food", "rotate", "vibrates", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000574927.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125221, "question_id": "ft4wmXE7GWT6LESKiiQ3rJ", "question": "What color would one navigate to to begin deciphering wii?", "choices": ["green", "red", "blue", "teal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396641, "question_id": "fx5xVPDtC2BfT8GCL44y76", "question": "What type of architecture is shown?", "choices": ["victorian", "futuristic", "historic", "modern"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200900, "question_id": "fxZofrAYxMdRXMwQBTSW3v", "question": "What action is the man taking?", "choices": ["skidding", "descending", "ascending", "rolling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532150, "question_id": "fyY9uXf7YtmdCmdmgnzxvw", "question": "Why would someone visit this location?", "choices": ["drink", "shop", "sleep", "exercise"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000532150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201586, "question_id": "fz4DrLMkQRjsUq2v4onGVC", "question": "What type of weather is most likely to cancel trips in this vehicle?", "choices": ["warm breeze", "rain", "clouds", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102477, "question_id": "fzRkCT7UEFBfQLJc6bz2iB", "question": "What type of construction is used for the door to the bathroom?", "choices": ["fiberglass", "solid core", "hollow core", "solid wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102477.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354872, "question_id": "fzRnWVsNRoh9jAVmyfbsmM", "question": "What is near the green car?", "choices": ["frog", "house", "fire hydrant", "trampoline"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354872.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336184, "question_id": "g29ki9jeGmysvcdsXdzryo", "question": "Why is the orange plate under the plant pot?", "choices": ["decoration", "catch water", "catch leaves", "stability"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517146, "question_id": "g2WjGBoP5KZwAW3osxdcne", "question": "The animal has what white part?", "choices": ["claw", "horn", "antler", "tusk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517146.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89358, "question_id": "g37PeLujHxXSaGY7Q2U6o8", "question": "If they began to move which animal is most likely to reach the camera first?", "choices": ["neither animal", "back animal", "front animal", "both animals"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000089358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579495, "question_id": "g37vRyTsTobaUra9nPZ49s", "question": "What is the stuffed animal on the bed?", "choices": ["bear", "dog", "cat", "elephant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579495.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456193, "question_id": "g3RyXVNXkHzgsuQ3T5zigQ", "question": "What are his pants made from?", "choices": ["denim", "silk", "fleece", "tericloth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456193.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478151, "question_id": "g3k6FM7VM7hkN3VZCBMAPf", "question": "What type of smartphone is present in the holder on top of the table?", "choices": ["iphone", "samsung galaxy", "blackberry", "android"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539523, "question_id": "g4AJFATwgRPguVoKRT8aCh", "question": "Which animal is dependent on the other here?", "choices": ["left", "both", "none", "right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201812, "question_id": "g4awBM7vtsWMUUSnMQEpsD", "question": "What is causing the trees to grow sideways towards the same direction?", "choices": ["sun", "grass", "cement sidewalk", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201812.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249903, "question_id": "g4cNZJKTf4ASnYNNf7KZ7M", "question": "The address of this building probably includes which state?", "choices": ["texas", "pennsylvania", "north dakota", "california"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249903.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254023, "question_id": "g4zGcPgp9MvnwMK3aA3GAg", "question": "What is the red stuff on the plate?", "choices": ["beans", "beets", "peppers", "ketchup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533622, "question_id": "g6AoNKUe5e3uT8AFdJ9WUz", "question": "Where is the most dangerous place to be in this image?", "choices": ["sky", "car", "street", "castle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533622.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443791, "question_id": "g7LV7Dk4dDBEjukXq4BPVZ", "question": "In which country is this aircraft hanger located?", "choices": ["canada", "mexico", "spain", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443791.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394427, "question_id": "g85W2PvmS7XGehoTPvKLZD", "question": "What can the owner do on this page?", "choices": ["check messages", "write blog", "play game", "take class"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000394427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115801, "question_id": "g8yBSgL7zgTcXZqCpbH3vi", "question": "What might one put inside the small rectangular appliance sitting on the countertop?", "choices": ["bread", "french toast", "pudding", "milk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115801.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176786, "question_id": "g94MmYZdoZruCdsdgnVVxh", "question": "Flower vases are mainly used for what?", "choices": ["none", "look", "aroma", "decorate"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000176786.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117126, "question_id": "g9S7asD9hnrNRNMAmUAuY8", "question": "How many mammal species are seen here besides the person?", "choices": ["one", "four", "two", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117126.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137569, "question_id": "g9YpTT7fxtEhgLZkA2kkM8", "question": "What does the nose of the face on the bear's shirt resemble?", "choices": ["licorice", "button", "carrot", "mouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137569.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87300, "question_id": "gB9yKm6otZdXncbEwJYMjL", "question": "What type of architecture is shown?", "choices": ["antique", "greek", "modern", "victorian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087300.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529818, "question_id": "gCHVwt9sLAEyQYygZKQZty", "question": "What is a famous brand of the item that is in the box on top of the toilet?", "choices": ["energizer", "kleenex", "coca cola", "swiffer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349484, "question_id": "gDH8Q5QGVuQXzzfprZjbLq", "question": "What kind of pie is this?", "choices": ["apple", "chery", "strawberry", "pumkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349484.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249880, "question_id": "gDoZhDkNo39KWkSssF5iDd", "question": "The air temperature surrounding the bear tearing down the tree is what?", "choices": ["cold", "hot", "mild", "warm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356900, "question_id": "gEZGik2qTXtNDxRUJVLR4Q", "question": "Which kind of room can you find the following?", "choices": ["kitchen", "bedroom", "washroom", "living rom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507297, "question_id": "gEv7QSpTYUK9tVnKxVRVGE", "question": "The light colored collar worn by the animal here is what type?", "choices": ["name", "jeweled", "flea", "silver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372304, "question_id": "gFGoTR4qEHhcCrfins9KZK", "question": "Dark condition is due to the absence of?", "choices": ["neutrons", "electrons", "photons", "protons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109885, "question_id": "gFL5Hh7SJdUJt8y9NWA8ZM", "question": "What is behind the person?", "choices": ["waves", "whale", "zombie", "eagle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109885.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145948, "question_id": "gFTDgiheXH8ryEJm8TDceN", "question": "What is the couple doing?", "choices": ["fighting", "taking selfie", "dancing", "getting married"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262881, "question_id": "gGAWq8TZFL3e8MVxEBQMov", "question": "What is this aircraft doing?", "choices": ["taking off/landing", "dogfighting", "flying", "undergoing maintenance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112046, "question_id": "gGZYPDKM7Ged9RqDzSEvm4", "question": "In what type building are these beds?", "choices": ["motel", "zoo", "office", "nap room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519771, "question_id": "gHcMt9Liu7s3Qet5hBm6Mz", "question": "What number is closest to what the shorter hand pointing to?", "choices": ["32", "five", "190", "180"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336412, "question_id": "gHnnVLGTEdQFS45LWXiQ5f", "question": "What body part are these used to clean?", "choices": ["hair", "knees", "hands", "teeth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336412.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467094, "question_id": "gJKWrWzESx7xLhyTSKYsDp", "question": "This defunct airline was based in what city?", "choices": ["east berlin", "frankfort", "bern", "prague"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467094.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505466, "question_id": "gKdjLMkDLcXjdmr2PAukGy", "question": "What color is the building on the left?", "choices": ["black", "red", "blue", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569473, "question_id": "gLuSUJmytRXQY8jaDFA7s7", "question": "What are the boys called?", "choices": ["teenager", "adult", "elderly", "babies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569473.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19527, "question_id": "gM7ciaTaMddUJJiyZGHrbd", "question": "What is the girl most likely doing to the frisbee?", "choices": ["placing", "throwing", "catching", "spinning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019527.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389929, "question_id": "gPNM93gGDYtgHD2MsaZiYv", "question": "What does the bear's fur feel like?", "choices": ["dry", "wet", "spiky", "silky"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389929.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393747, "question_id": "gPdzEVzTYZ3cHPRw5pCSQD", "question": "What kind of clothing is the woman wearing?", "choices": ["tankini", "board shorts", "bikini", "wetsuit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393747.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389917, "question_id": "gQxeJWAyzggfmnUepMXW95", "question": "The man is wearing what kind of outfit most likely?", "choices": ["gym", "swim trunks", "wet suit", "t-shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212120, "question_id": "gR4BbA2jJd2rAsHcMMXadg", "question": "What is required for this activity?", "choices": ["rain", "ice", "snow", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442265, "question_id": "gRAkZb3qVhcc3S2MwiCbhQ", "question": "Which one of these parking times would be acceptable here?", "choices": ["5 hours", "4 hours", "1 hour", "3 hours"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183281, "question_id": "gT3YEoz2KppNt5PcUKkRG3", "question": "Who plays this same sport?", "choices": ["ashleigh barty", "jennie finch", "billy barty", "flora finch"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000183281.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80461, "question_id": "gTUWxVxrGWn9Cz6PNGtaY5", "question": "Where does this airline rank size wise among North American airlines?", "choices": ["14th", "35th", "10th", "8th"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000080461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173794, "question_id": "gUGp6egkS4i8QSCvdMyNfm", "question": "What kind of activity is taking place on top of the newspaper?", "choices": ["surgery", "reading", "food prep", "crafts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452289, "question_id": "gUt5L56NRdjSdAF9xdbLc7", "question": "What is the most logical explanation for what the man is doing with the wood?", "choices": ["transporting", "paddling", "seating people", "fishing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505030, "question_id": "gVSFfKxmEGSja3UbTrhwMW", "question": "Which one of these would be useful in this area?", "choices": ["night stand", "toilet paper", "paint brush", "tools"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000505030.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418452, "question_id": "gVTabiLYAhPD9shBn4sg9w", "question": "What might the person highest on the knoll here await?", "choices": ["phone signal", "wind", "date", "ride"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450708, "question_id": "gVrLz5ddXsyhS3MgHE6RPj", "question": "What are these elephants doing?", "choices": ["falling down", "cuddling", "fighting", "crying"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000450708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409154, "question_id": "gVrNRpLQxy3wkGJdjRHZFV", "question": "Club is used in which sport?", "choices": ["baseball", "cricket", "golf", "tennis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87312, "question_id": "gWNJ5RPKEApPexo6DLborE", "question": "What powers the vehicle shown here?", "choices": ["electric", "human", "gas", "solar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087312.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267755, "question_id": "gWWCU2ZNtzgNTtN6pDWG65", "question": "What part of the toilet is this cat standing on?", "choices": ["flush", "lift chain", "seat", "tank"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267755.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128502, "question_id": "gXFX9uYWKZ5GF5TXEpPEJC", "question": "What water sport is the man attempting here?", "choices": ["water hockey", "sea walking", "free diving", "kite surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000128502.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483602, "question_id": "gYd6x6S5FzP2xCCYEcnrDF", "question": "What are the animals near?", "choices": ["cars", "trees", "benches", "cardboard boxes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483602.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32216, "question_id": "gYpAtGfKpo5acZ93cHC7En", "question": "What could be said about the positioning of the zebras?", "choices": ["parallel", "abnormal", "forced", "symmetrical"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000032216.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 327762, "question_id": "gYtCvvm7VLMz75L6HcXPYs", "question": "In what setting is this man?", "choices": ["school", "home", "library", "commercial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000327762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413589, "question_id": "gZHxR8aANjUxdPkXd2eVwX", "question": "What kind of phone is this?", "choices": ["sony", "nokia", "samsung", "blackberry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413589.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357720, "question_id": "gZz5FGgYN5ZBJhLrxsZjgQ", "question": "What country might these donuts be in?", "choices": ["belgium", "germany", "united states", "mexico"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357720.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 489561, "question_id": "gaDBfbCZdz8VWpBVtJSn5a", "question": "The advertised railway is from what decade?", "choices": ["1820's", "1930's", "1880's", "1980's"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000489561.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424762, "question_id": "gac7iuXAcqAJSfJgCoexhK", "question": "What part of these animals are fully visible?", "choices": ["tail", "lips", "eyes", "claws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459351, "question_id": "gagcSt8tY7wiHpoHfW88kB", "question": "What term is used to refer to these kinds of animals?", "choices": ["hounds", "kitties", "foal", "cubs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459351.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168853, "question_id": "gbcTs3P6wxVjwb54WSUKVR", "question": "What wrestler's first initial spelled out is the same as the word on the top of the bus?", "choices": ["c.w. anderson", "s.d. jones", "l.a. park", "cm punk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577814, "question_id": "gbtj6Jvj6uEFQyjtwQbMj6", "question": "In what position is the sun in relation to the dog?", "choices": ["bottom", "right", "top", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577814.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511987, "question_id": "gctAjzEwyryuQFPrJ4FPtv", "question": "What does this animal have an abundance of?", "choices": ["feathers", "antenna", "wool", "fur"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511987.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158817, "question_id": "gdBBjZ8S8QzANtGG4sqdoX", "question": "How many people is a bed this size meant for?", "choices": ["eight", "one", "seven", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204901, "question_id": "gdLuCuny85w6wR9TTH6PQ5", "question": "What is unnecessary here?", "choices": ["utensils", "table", "hands", "shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204901.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577151, "question_id": "gfV82z7ANwojo7b93cASvq", "question": "What is this type of sign called?", "choices": ["historical", "promotional", "brand", "traffic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 312022, "question_id": "gfZJvLuN9tLvHjsFMdDePc", "question": "What kind of lens is used in mirror?", "choices": ["mirator", "concave", "reflective", "convex"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000312022.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22895, "question_id": "gfpurHZfppReGGBvKmce4T", "question": "How is the man getting around in the water?", "choices": ["by kite", "by plane", "by boat", "by horse"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000022895.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93897, "question_id": "ghBaHpeSAvRXtpcW9L6cKq", "question": "Why does this man have his hand up?", "choices": ["gesture", "break fall", "measure", "wave"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000093897.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346483, "question_id": "ghgFzU3LXouHmUzrsN8tZi", "question": "What design style is used to trim the exterior of the homes in the background?", "choices": ["southeast asian", "bauhaus", "brutalist", "gothic"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000346483.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548782, "question_id": "gi2JyUK8Rr5n4ctJd7whiv", "question": "Why is the train so small?", "choices": ["for midgets", "for children", "mistake", "not real"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548782.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473528, "question_id": "gi5s9YtSeiMjrVaSHSubVj", "question": "What activity is the dog doing?", "choices": ["walking", "running", "sleeping", "jumping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473528.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173660, "question_id": "gjQrsJEK5izqDX8NGJ6sky", "question": "What is used as a base topping for this dish?", "choices": ["ketchup", "tomato paste", "mayonnaise", "milk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159079, "question_id": "gjhDRGfBrJCe9X6UxvaER3", "question": "What fuel source is used by the person cooking here?", "choices": ["electric", "microwave only", "coal", "natural gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356497, "question_id": "gjyFfk8oEEjQ4XVuxRfWsZ", "question": "Which company might one use instead of the yellow truck?", "choices": ["kohl's", "maybelline", "enterprise", "godiva"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356497.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435256, "question_id": "gkbVaKvPVg5VYNJARPNcFX", "question": "What are these creatures able to do with just their hindlegs?", "choices": ["stretch", "run", "kick", "stand up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435256.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 426515, "question_id": "gmsXMs3xSKeXgQFr2EyFCU", "question": "What is the driest part of the elephants?", "choices": ["trunk", "feet", "forehead", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000426515.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 72100, "question_id": "gnNdpw9yjT9JVFgzA2BcuW", "question": "What plant besides grains potatoes and tomatoes produced to make this meal?", "choices": ["marzipan", "beets", "corn", "cucumbers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000072100.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223329, "question_id": "gnnh4QYCdAewSuaRVK7cLq", "question": "What is hanging above the fireplace?", "choices": ["pictures", "mirror", "shades", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118054, "question_id": "gntoGsLYoSoLYCGikxVQHB", "question": "What number do you get when you add the first two numbers in the four digit sequence on any of the tags?", "choices": ["eight", "nine", "four", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309691, "question_id": "goqHacNKxyNXf3fyL6RKvF", "question": "What will these animals likely do next?", "choices": ["sit down", "walk", "drink", "stand up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277733, "question_id": "gpU4JZVkZxvDPtGhF28zAD", "question": "What character is a similar animal as this one?", "choices": ["garfield", "sonic", "snoopy", "crash bandicoot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277733.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298958, "question_id": "gq2UwwgFN4YYrV6cVeRSKj", "question": "What can be used to describe the blue substance on the pipes?", "choices": ["vandalism", "non-existent", "decorative", "adhesive"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299080, "question_id": "gqfp7n6qU7d6rFoKUDh52V", "question": "What is the name of the club?", "choices": ["golf", "trophy", "badger", "drinking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71640, "question_id": "gryS7kGXwNteGcC7e9UFLc", "question": "What continent is the country on the Chiquita label on?", "choices": ["europe", "asia", "south america", "north america"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395674, "question_id": "gsWT9FENJjqizNr9VE3Ce5", "question": "Where is this clock located at in SYDNEY?", "choices": ["post office", "bank", "court house", "university"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347364, "question_id": "gsWy5Po27f82iLfNHxHFWR", "question": "How has this food been portioned?", "choices": ["shreds", "slices", "scoops", "cubes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386837, "question_id": "gshaA4vF9PjBHMXdUkYYWQ", "question": "Which giraffe could outrun the other?", "choices": ["taller", "none", "either", "shorter"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000386837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354999, "question_id": "gt8TA39uGdRs93kDoS9Yzr", "question": "What is good tire pressure for van?", "choices": ["20-25 psi", "20-25 psi", "40-45 psi", "30-35 psi"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000354999.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276249, "question_id": "gu78pPLykar9vY9J2TZfzY", "question": "Where can the zebras hide?", "choices": ["cave", "grass", "trees", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192673, "question_id": "guTFPPzjiadXfQoDLWPbUG", "question": "What sign would you press to increase the sound level on your TV using this device?", "choices": ["plus", "greater than", "down arrow", "minus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192673.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401039, "question_id": "gvXd6duZ96wSwT7AZb2ioH", "question": "How is the smaller animal related to the larger one?", "choices": ["stranger", "calf", "sire", "dam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570653, "question_id": "gvt32rkd7fFer7sS2BHB7W", "question": "What were the first surfboards originally made of?", "choices": ["wood", "clay", "straw", "bamboo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167846, "question_id": "gwJsUpyxFugSqhxuJxb8m2", "question": "What is held in one of the hands of the photographer taking this pic?", "choices": ["soda", "nothing", "leash", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120707, "question_id": "gwKf353T2uFjh3UNBd7rmS", "question": "What religion did the artist mentioned practice?", "choices": ["episcopalian", "mormon", "methodist", "quaker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374179, "question_id": "gwLdSLG6dAodWhu5hQHRTm", "question": "What kind of activity can be performed here?", "choices": ["phone call", "payment", "pictures", "shopping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374179.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77666, "question_id": "gxcCgMbKqxppLn8EYN6ZvC", "question": "What type of surface is this food located on?", "choices": ["lap", "stove", "counter", "table"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000077666.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83171, "question_id": "gxdkCFtzdGoLasseRttigB", "question": "Where is the clock located?", "choices": ["farm", "store", "hotel", "motel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083171.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454640, "question_id": "gxzKXtc8mzFxfLkCJMom99", "question": "What does a person usually do in this room?", "choices": ["exercise", "eat", "wash", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129446, "question_id": "gzZqNNBfruGt6N5zcU9x94", "question": "What type of enclosure is being used?", "choices": ["fence", "cage", "barn", "gate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456447, "question_id": "h2TqNRyCM5JEro8zDQdvXg", "question": "What is wrapped around the branch?", "choices": ["human arms", "elephant trunk", "monkey paw", "lemur tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145968, "question_id": "h2fMUpAyzRwY6Dm2ArSy3s", "question": "What is this boat's name inspired by?", "choices": ["math", "literature", "history", "chemistry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272106, "question_id": "h3SvwJVXqh7se5r8vHQuPL", "question": "What surface are these bottles located on?", "choices": ["chair", "table", "bench", "ground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461994, "question_id": "h44AyjDQWsdsKw2TnRW2X3", "question": "What was moving when the picture was taken that caused blur?", "choices": ["weather", "clouds", "elephant", "vehicle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461994.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476395, "question_id": "h4SBfjRw8q6LFWu9fJiGrp", "question": "This type of obstacle was created for what?", "choices": ["water", "air", "tricks", "speed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476395.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257299, "question_id": "h4YE6JDHAbmAHMVDnBcdHn", "question": "Why is the vase red?", "choices": ["tinted water", "tinted glass", "red flower", "tinted window"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000257299.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543063, "question_id": "h4kQuXibsjbBox2eU48Ag7", "question": "What liquid is currently in the most clear glassware here?", "choices": ["cola", "water", "dyed soda", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432911, "question_id": "h5JuWqEdxu6C8CyXWgvSwA", "question": "What tennis move is the man doing?", "choices": ["backhand", "serving", "hitting", "missing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432911.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493160, "question_id": "h5LwFgGWfvc8joMnZxzD7k", "question": "What is the profession of the person who makes this arrangement?", "choices": ["painter", "electrician", "dentist", "florist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493160.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56779, "question_id": "h8JdVgWMEbZHsqMHzqdvkA", "question": "What is the man walking across?", "choices": ["asphalt", "grass", "wood", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056779.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533626, "question_id": "h96Yep5T8m55x5DMioB6CK", "question": "The person here just came from where?", "choices": ["car", "car", "inland", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488091, "question_id": "h9PvQKjsfingzfkn5qzyuu", "question": "What is the paper here used for?", "choices": ["writing", "cleaning", "reading", "wrapping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488091.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483789, "question_id": "h9qDFeWckkCZ92zysBbAtV", "question": "The group of chimneys present on the roof of the brick building indicates the building is in which country?", "choices": ["australia", "canada", "united kingdom", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316921, "question_id": "hADKYpX2suhqdbocJeYLSe", "question": "What is the zebra standing on?", "choices": ["ice", "water", "grass", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316921.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125276, "question_id": "hAmbxqM5ydCnBuufVGqAYR", "question": "What type of transportation is available?", "choices": ["water", "rail", "road", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48480, "question_id": "hBD5kgUNuda3QXoHrZsKbR", "question": "The color that least scattered by the atmospheric particles so used for warning is?", "choices": ["yellow", "blue", "green", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048480.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342481, "question_id": "hCVvHMHwC4AnRScBgM6uz8", "question": "What is this dog learning to do?", "choices": ["swim", "roll", "fetch", "speak"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342481.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130253, "question_id": "hDTPuPE8bxUv2zT8jTLaA2", "question": "People in this area are likely to speak what language besides English?", "choices": ["portuguese", "greek", "dutch", "french"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130253.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84414, "question_id": "hETp8oRBbsk8kKDDRNKekA", "question": "What age groups are usually transported in this vehicle?", "choices": ["20-30", "35-50", "5-17", "50-60k"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212, "question_id": "hEWsgzkdJuTsbWqPDd362a", "question": "What season of the year is it?", "choices": ["summer", "spring", "autumn", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000212.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102874, "question_id": "hFMbU8pinsTsTQVf6Rp5wr", "question": "How could this area be described best?", "choices": ["forest", "taiga", "savanna", "swamp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102874.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204016, "question_id": "hGJXbA8U9AaHHhVKiXQD5i", "question": "What type of language is above the toilet?", "choices": ["german", "spanish", "french", "english"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204016.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93777, "question_id": "hGYhCXmTAp5ihf89LYGnB2", "question": "What type of flight would this airplane normally be booked for?", "choices": ["supply", "training", "private", "public"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417731, "question_id": "hJbhXXp3qPRCBXYWi5jnLT", "question": "United airlines routed across how many continents?", "choices": ["six", "two", "four", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391744, "question_id": "hJq2wx6vjMSMvmd65bqAky", "question": "How many types of four-legged mammals are here?", "choices": ["zero", "one", "three", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391744.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109660, "question_id": "hK2fWKaZ4g9ZwZXiUkw7Wz", "question": "Which NHL team is on the mousepad?", "choices": ["flames", "rangers", "jets", "hurricanes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95642, "question_id": "hLVsovFrgmQW98JYqmTGoi", "question": "What color is the handle of my brush?", "choices": ["yellow", "blue", "green", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095642.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381489, "question_id": "hLjCujHWfC8L74v7zKZb6p", "question": "What clothing item is concealed near this man?", "choices": ["neck tie", "stripes", "brick bat", "hats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381489.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110813, "question_id": "hMLJrXkEp8gknKYLFvFQJ4", "question": "What kind of plane is shown?", "choices": ["passenger", "paper", "military", "cargo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467306, "question_id": "hNhv4c6UYQid2fGSZLHALs", "question": "What is lacking under this dog's face for these wrinkles to show up?", "choices": ["blood", "bones", "muscles", "fat"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000467306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283381, "question_id": "hQqGsh2gUNk8S2Mt5R2935", "question": "The photograph of these zebras was taken from where?", "choices": ["suv", "helicopter", "car", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283381.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154632, "question_id": "hRXnBjYwBjYFDmC6JVLYW3", "question": "These are too what to be used for their typical purpose?", "choices": ["shiny", "small", "colorful", "soft"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154632.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155798, "question_id": "hRgyemE3PVMZJGLT35jAGs", "question": "How many ears are there in total among all the animals?", "choices": ["six", "four", "eight", "seven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136545, "question_id": "hSNHeyuoAUei9uev4azxAA", "question": "Why is the bird close to a piece of pizza?", "choices": ["to investigate", "to share", "to eat", "to hide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136545.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506594, "question_id": "hTVVG6JPd3sPWCHzoMRF5q", "question": "What type of board is being used?", "choices": ["ironing", "cutting", "surf", "construction"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506594.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524161, "question_id": "hTyNYNv8y3ogpyjE4iH7B7", "question": "What type of hat is the thrower wearing?", "choices": ["cowboy hat", "baseball cap", "newsboy cap", "fedora"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231510, "question_id": "hUNWkUMXayMZELeNJHkL5H", "question": "The apple juice is sold by what retailer?", "choices": ["walmart", "aldi", "costco", "cvs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231510.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153070, "question_id": "hVcnFRL37AHVRtWbG9DroB", "question": "What type of cereal is in the bowl?", "choices": ["grape-nuts", "cheerios", "frosted flakes", "trix"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84508, "question_id": "hWfm2yKFcso8jTSFsPRRPN", "question": "What function do the round parts serve?", "choices": ["decoration", "pest control", "massage", "screws"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000084508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117126, "question_id": "hWgbz7jA6qhCXmKMsPNHrw", "question": "The name of the state on her sweatshirt comes from what tribe?", "choices": ["cherokee", "seminole", "cree", "shoshone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117126.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552853, "question_id": "hWpHAj4aRRSVPsVM7sw2rc", "question": "What is causing the reflection behind the young man?", "choices": ["holograms", "water", "glass", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138762, "question_id": "hX8NygZgzfVtj5f7DrLH5S", "question": "What type of trick is the person performing?", "choices": ["shifty", "grind", "critical", "360"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393307, "question_id": "hXB5V2A4pjuDyc3gmkAbZR", "question": "What natural predator might be found here?", "choices": ["tiger", "warthog", "wolf", "shark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393307.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319069, "question_id": "hXEmvqLqd9bnxgQ66XTq5S", "question": "What type of material is the sink in the kitchen constructed from?", "choices": ["ceramic", "copper", "plastic", "stainless steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 46955, "question_id": "hY3QywcYiRdvVpchwampoF", "question": "What meal will likely be eaten soon?", "choices": ["breakfast", "brunch", "dinner", "morning tea"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000046955.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347657, "question_id": "hYG5FVd96wkkMmibqnPDiQ", "question": "What can be said about this person's motion?", "choices": ["slow", "stopped", "fast", "backwards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375587, "question_id": "hZFGd5SsmaaTY96rLaURhz", "question": "The yellow container is meant to be used to place what inside?", "choices": ["ballots", "tickets", "trash", "donations"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000375587.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389377, "question_id": "hZa4zzntnhohZYVqfopED4", "question": "This dish would taste terrible to someone who hates what?", "choices": ["fruit", "seafood", "donuts", "cake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573768, "question_id": "hZpQkSANkVwJB2onf9UzSa", "question": "What is this umbrella used for?", "choices": ["disguise", "rain", "snow", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451225, "question_id": "hZptTATnyWwc3coiXktAPi", "question": "What is the dog chasing?", "choices": ["person", "animal", "frisbee", "rope toy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451225.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299989, "question_id": "ha8x5FQYVdVrKsJtHzMDCV", "question": "What can be said about the substance on the sign?", "choices": ["slimy", "hot", "evaporating", "frozen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299989.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577622, "question_id": "haHhyage5sUETvvqdrpiCP", "question": "The item attached to the sign can be used to vanquish what creature?", "choices": ["demon", "werewolf", "ghost", "vampire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577622.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135569, "question_id": "hagFtnjhoK7AejbgnZEuoX", "question": "This beverage company was founded by an immigrant from which country?", "choices": ["italy", "latvia", "indonesia", "sierra leone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135569.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124261, "question_id": "hanv4pZrbBmQMSifSe4yBP", "question": "Form which direction does the light enter this room?", "choices": ["center", "right", "left", "up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199023, "question_id": "hcAKB9D2f9LdvrY2fNuh8P", "question": "What is the bell shade and color?", "choices": ["dark metal", "light metal", "white wood", "brown wood"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000199023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133512, "question_id": "hcVnpG6jNHdkcHYs9g5mEu", "question": "Why is the pizza discolored in this image?", "choices": ["cellphone", "shade", "moon", "camera flash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133512.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308728, "question_id": "hcbGm6Qnx2LEmAGV3wJUPq", "question": "What might have been used on the right two besides a knife?", "choices": ["fork", "vegetable peeler", "spoon", "straw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308728.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37930, "question_id": "hfofbLrWZX7hw9z9CGDit4", "question": "What country was the company that makes the item in the can founded in?", "choices": ["nepal", "italy", "china", "spain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037930.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51293, "question_id": "hgC8zkhGCnBeCQJzNLuN5o", "question": "What is needed for this activity?", "choices": ["water", "sun", "sand", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051293.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170242, "question_id": "hgEzbm8HNwDRB5HYfLCveC", "question": "The white item in front of it helps with what?", "choices": ["paws", "fleas", "eyes", "teeth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145810, "question_id": "hgsQ6wAjWwMQa2re2aQYUU", "question": "Someone standing in front of the bench can look at what?", "choices": ["reflection", "puzzle", "fan", "paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145810.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35194, "question_id": "hi5xoeF8qBE3mULzgvk3jD", "question": "The dog here hopes for what?", "choices": ["nothing", "movie deal", "scraps", "money"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424795, "question_id": "hi6Mu5qidzXVBcxrurEuBs", "question": "What flavor is the filling?", "choices": ["hazelnut", "dark chocolate", "milk chocolate", "bavarian cream"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424795.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345424, "question_id": "hiohRbpU3tCrM7tBH3iBU5", "question": "What sports equipment is next to the dog?", "choices": ["tennis", "cricket", "ice hockey", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6769, "question_id": "hipKFnjnwC9qjQmgQADSSa", "question": "The car splashing the woman is from which continent?", "choices": ["asia", "america", "europe", "australia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 225110, "question_id": "hjVKPpC4D2FfwJ8PUuL4NW", "question": "The person on the bench is wearing an outfit that looks similar to the outfit of who?", "choices": ["fire fighter", "clown", "police officer", "monk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000225110.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514394, "question_id": "hjxhEkFYvDEUNfrxLXoASe", "question": "Which book would be the most difficult to read for someone on the left?", "choices": ["c book", "programming perl", "pro spring", "bottom book"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6161, "question_id": "hm3miGE2E4VMNTWvhVvt76", "question": "What kind of eating styles does these animals have?", "choices": ["vermivore", "carnivore", "omnivore", "herbivore"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243790, "question_id": "hnMHWh55uVc62faf78mDds", "question": "What word can be seen on the wall?", "choices": ["locate", "social", "here", "house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101277, "question_id": "hnavRHDNXWvgBvUESfY5Rg", "question": "Why are they carrying an umbrella?", "choices": ["rain", "shade", "hurricane", "tornado"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000101277.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439405, "question_id": "hoNoa6TYePvmuza3XQYp2x", "question": "What is needed for this activity?", "choices": ["battery", "wind", "remote", "magic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439405.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358618, "question_id": "hp53e3kKzHS6yBjWfEHsRm", "question": "Is the surfer wearing?", "choices": ["or", "pink shorts", "blue", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358618.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387608, "question_id": "hr3NzV7AdkUqzWqiEmKjm7", "question": "How many people ride in the tandem version of this sport?", "choices": ["three", "two", "four", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292089, "question_id": "hsUPYvCpG83Kqg9dsVYkNd", "question": "What is the profession of this man?", "choices": ["doctor", "coach", "athlete", "florist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292089.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115307, "question_id": "hsuCUXzAe8aYgm5uSuRjzU", "question": "What fixture is probably also in this room?", "choices": ["swing", "toilet", "bed", "oven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115307.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91367, "question_id": "htLPPovpxi96rVMvmcdwB2", "question": "What is this place?", "choices": ["laundry", "department store", "mortuary", "hotel room"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000091367.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285591, "question_id": "htMbBFxEpSAVS3vRiyqbcS", "question": "What is the player prepared to do?", "choices": ["dunk", "sprint", "dribble", "swing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285591.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162184, "question_id": "hupAkJww7ffFGLgqnC9Atb", "question": "What is special about the large blue numbers on the plane?", "choices": ["palindromic number", "infinite number", "even number", "negative number"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000162184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173765, "question_id": "hxVGi6VRhRjRxohWduEMzV", "question": "What type of surf is closest to the camera?", "choices": ["bodyboard", "fish", "hybrid", "longboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528249, "question_id": "hxuVbb9RwK8KSsWkvgDAWj", "question": "How many giraffes are touching each other?", "choices": ["five", "six", "four", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550327, "question_id": "hxxVjeyhpvCwRF6kdc4z5g", "question": "What was used to make the hanging holder?", "choices": ["hat", "sock", "old tie", "mitten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550327.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302598, "question_id": "hy5FABNfJ8z6aAwtkKmTkh", "question": "What does this animal fear?", "choices": ["birds", "meat", "vinegar", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302598.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433687, "question_id": "hyBviT6iBZknuwZQMwwbh7", "question": "Where will they ride the bikes?", "choices": ["broken", "boat", "land", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347260, "question_id": "hzdEKFVx4QjVmCMLDwzb6k", "question": "What is the state of the bus?", "choices": ["pristine", "full", "half full", "empty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347260.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299706, "question_id": "i2S5ifnXhKrJsT92Mkhdmq", "question": "What kind of message is the person holding up their hand indicating?", "choices": ["indifference", "nothing", "disapproval", "approval"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299706.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368433, "question_id": "i4aWW4v4YpvVLYBQCuuJ2t", "question": "What is the liquid form of the white stuff on the ground?", "choices": ["clouds", "oxygen", "water", "gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 134156, "question_id": "i5UvwskDrDHsypzaZXTMmy", "question": "What country is this?", "choices": ["russia", "usa", "uk", "china"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000134156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490806, "question_id": "i5oeiygz3Bo4NKxJMRqfgN", "question": "What is this person dressed for?", "choices": ["swimming", "business", "sleep", "exercise"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262032, "question_id": "i63heaMumKDcnvH5LCKQ33", "question": "What word visible here is also a descriptor for this thing?", "choices": ["none", "qwerty", "uiopa", "asdf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262032.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374354, "question_id": "i7PMo7sks3REw5pTApdhYq", "question": "Where are these animals located at?", "choices": ["barn", "zoo", "wilderness", "park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84070, "question_id": "i7sqfGRQp24AUAjTfNdf5C", "question": "What can be paid for with this meter?", "choices": ["gum", "taxes", "parking", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3759, "question_id": "i99iPsZm729rh8tP5CGLRF", "question": "What mode of transportation is the red vehicle?", "choices": ["bus", "train", "truck", "van"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003759.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55892, "question_id": "i9hJjzJu7uRNUbxfZtXb2Z", "question": "The company in the foreground is based out of what California city?", "choices": ["san francisco", "los angeles", "fresno", "anaheim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3878, "question_id": "iAEy8KBHLLngFCbSJV7rtG", "question": "What environment would make the monkey so relaxed around humans?", "choices": ["wild", "unnatural", "zoo", "natural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003878.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153213, "question_id": "iANecuqvfy7YQThXnJDrq3", "question": "What was the first name of the namesake of the first sign?", "choices": ["henry", "thomas", "robert", "jordan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153213.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508745, "question_id": "iAgxs2g7QdoYiRdHWqddwe", "question": "What material is on the ground?", "choices": ["hey", "grass", "tlle", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298813, "question_id": "iBm5qARd2MzqHsqSMHbY9x", "question": "What is helping the man surf forward?", "choices": ["sail", "outfit", "feet", "cords"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494590, "question_id": "iCJNEAA5iHGxBUQrWxi3t5", "question": "What are the zebras doing?", "choices": ["drinking", "eating", "hiding", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214410, "question_id": "iDXPXm4mgp4GDs4ABQDbXD", "question": "What type of material is the wall made out of?", "choices": ["concrete", "wood", "plaster", "bricks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214410.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199322, "question_id": "iDuknz53KrtrB3XwDttZTT", "question": "What is the name of the symbol on the surfers shirt?", "choices": ["yin-yang", "lgbq", "peace", "anarchy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199322.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567541, "question_id": "iDyQm7MAZPoTggEW5TGSCn", "question": "What caused the concrete to turn black?", "choices": ["garbage", "soot", "paint", "lichen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373507, "question_id": "iEogNLk6GFbNCM46uVdTG4", "question": "Why is he stopped?", "choices": ["is lost", "is hungry", "resting", "posing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000373507.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533970, "question_id": "iFZXamcpCKZ4MgcTXLFRTj", "question": "Which direction is not an option after stopping and looking at this sign?", "choices": ["forward", "right", "left", "u turn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533970.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259905, "question_id": "iFcmoAinFmDfxihAmvnKnu", "question": "What is visible in the water?", "choices": ["boats", "dolphins", "sharks", "dogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259905.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295899, "question_id": "iGtxupVqkvhLGDUWd3qVck", "question": "What state is this photo from?", "choices": ["new york", "florida", "texas", "california"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295899.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234069, "question_id": "iHLbb5vaeUfTMaYSxGdbDe", "question": "In what season is the woman holding the umbrella being photographed?", "choices": ["fall", "spring", "summer", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298056, "question_id": "iHjkX2fqmqfGMbo5u7KbM4", "question": "Who controls this frisbee?", "choices": ["photographer", "miss america", "mark cuban", "mortal enemy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251619, "question_id": "iJhGBsvbJxMzghaaM94cmZ", "question": "What does it say at the bottom of the red sign?", "choices": ["yield", "turn back", "all way", "crossing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251619.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376742, "question_id": "iKmJicF5KxrkGA6hbv6qLR", "question": "What type of animal is shown?", "choices": ["reptile", "domestic", "aquatic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393208, "question_id": "iKvnsL6GYbQnpSSGAbBTFG", "question": "What number is on top of the hydrant?", "choices": ["922", "456", "971", "634"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292458, "question_id": "iMECVZ9NbtnwscDq584Rii", "question": "What type of vehicle is shown?", "choices": ["motorcycle", "motor scooter", "golf cart", "dirt bike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292458.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83429, "question_id": "iPX8uLFdZtki2St8gBQNPo", "question": "Which way can a driver go after stopping at the sign?", "choices": ["go straight", "turn left", "make u-turn", "turn right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520465, "question_id": "iPmVxieWhoV2wcLrMi74Kp", "question": "The shell appears to be made from what grain?", "choices": ["corn", "rice", "wheat", "sorghum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520465.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440128, "question_id": "iQT74NrWzaWBoFBHCnVm2h", "question": "Who is the hut at the background built for?", "choices": ["no one", "woman", "horse", "plants"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35688, "question_id": "iQUwJ57htxmqg69nEsdw9v", "question": "What colour is the bean bag chair on the left?", "choices": ["blue", "green", "orange", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412909, "question_id": "iRJ5L9HRqXAapnKuWt3s8P", "question": "What is the vehicle called that the kid is riding?", "choices": ["wagon", "powerwheel", "scooter", "tricycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412909.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467444, "question_id": "iRkXiW7hA8ka8GMs6YZS8z", "question": "Which country do you usually find the type of candy that is in this basket?", "choices": ["russia", "canada", "america", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467444.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190425, "question_id": "iVGrkuBmDEeR6SZys9VcHq", "question": "How many eyes are in the picture?", "choices": ["four", "three", "two", "zero"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501207, "question_id": "iXRxhePfFWMJS78Qaggvkm", "question": "Which one of this boy's senses is weak?", "choices": ["sight", "common", "taste", "hearing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89094, "question_id": "iYXWjPpEoJCKDuUiMuzFkF", "question": "Which larger animal is this creature related to?", "choices": ["hippo", "dog", "lion", "aardvark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089094.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486574, "question_id": "iZ8GmsP4Ba3qUo8cvbQmeg", "question": "What are the potatoes shown here cooked in?", "choices": ["butter", "soda pop", "soy sauce", "hot oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486574.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533205, "question_id": "iaXo4BndMedVnMVLPppmYX", "question": "What is he preparing to do?", "choices": ["hit opponent", "give up", "hit ball", "return serve"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533205.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410941, "question_id": "iaoiTDSJMEnWGQW85pLaWw", "question": "Who is this famous tennis player?", "choices": ["bjorn borg", "john mcenroe", "roger federer", "andre agassi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410941.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188776, "question_id": "ibVduG5R8xzoA3dVMzPDuP", "question": "What type of surface is the match being played on?", "choices": ["hard", "carpet", "grass", "clay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188776.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 266731, "question_id": "ibfpQkb279uDXoYkYxPQUB", "question": "What are the white items of clothing intended to cover?", "choices": ["hands", "elbows", "feet", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000266731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559965, "question_id": "icAVm8ga6vSyS5nU9k2Der", "question": "What item is below the mirror?", "choices": ["toilet", "dryer", "washer", "sink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559965.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485713, "question_id": "icSGaXuRpP4ez2JyNfo9mr", "question": "If the giraffe keeps walking what will it run into?", "choices": ["elephant", "car", "house", "fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 132852, "question_id": "icjrKDAGyZ7onW4XLDD96D", "question": "What kind of facility is this?", "choices": ["stadium", "museum", "aquarium", "rest stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000132852.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419358, "question_id": "idNVyA7vQ5LAdYaai87bsr", "question": "When resting here what feature is likely to draw most attention?", "choices": ["log bench", "cloud", "river", "ship"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17983, "question_id": "idubAcqtZEEpj6BeHdgWmu", "question": "Where is this sign located?", "choices": ["australia", "europe", "asia", "north america"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017983.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192604, "question_id": "ie2tgMqPRrRo5LvALoSAAz", "question": "This devise eliminates the need to use what to connect?", "choices": ["password", "cord", "screen", "account"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192604.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144181, "question_id": "ieHUQp4wHpFxNG6eynxg5b", "question": "What element should be added to improve this photograph?", "choices": ["shading", "umbrellas", "smile", "light"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52451, "question_id": "ieNXG8VCuWbvLuWBK2NjeB", "question": "What type of light bulb is used in the lamp above the bed?", "choices": ["led", "incandescent", "compact fluorescent", "halogen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052451.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147507, "question_id": "ifFmQW4R53Lk5bahLVTPh6", "question": "What is the first name of the athlete who wore these jerseys?", "choices": ["tom", "larry", "peter", "michael"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147507.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338450, "question_id": "ig4pxoDVzhxYma2FN5LKoU", "question": "Why are the bulbs shaped like flames?", "choices": ["economical", "tricking others", "mood", "cheaper"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000338450.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194887, "question_id": "ig6JZkqvca9LsfHQjKjCmT", "question": "What are the dogs getting on their paws?", "choices": ["snow", "juice", "mud", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000194887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77387, "question_id": "ig6rotubFNcZGHPXaKAqMQ", "question": "What is wrapped around the controller?", "choices": ["cat's tail", "hand", "tape", "barbed wire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440933, "question_id": "igiKQG2VssLhjXdfdpe6bx", "question": "How many people are in this image?", "choices": ["three", "five", "one", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394485, "question_id": "ihzBoV98QjEEh4LC6HJx9X", "question": "What kind of transportation is shown?", "choices": ["road", "water", "air", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000394485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 479076, "question_id": "iiCog3WG37yBtBN3r3sLPT", "question": "In which city can this type of garbage bin with an LCD screen be found?", "choices": ["tokyo", "new york", "london", "sydney"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000479076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164970, "question_id": "iiHXAwYbk6AA2ubLQUZRvK", "question": "What is a white colored object that is near the skateboard?", "choices": ["snow", "lamb", "milk", "shoelaces"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164970.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548392, "question_id": "iij8fxAy2aPHKrsEGUt8Fa", "question": "What is touching the skateboard?", "choices": ["boot", "nose", "whiskers", "hands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170447, "question_id": "ijEki7vJ9UzvXHQ4EDpKDU", "question": "His hat would be perfect to wear on the holiday named after which saint?", "choices": ["claus", "valentine", "patrick", "bartholomew"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137827, "question_id": "ikLPx7jtVFCme3guDRJXXt", "question": "What would be the biggest threat for the human here?", "choices": ["tree collision", "sharks", "other humans", "wiping out"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137827.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40626, "question_id": "ikLS9J7nac9A8oGevMxNQJ", "question": "What pastime is forbidden here?", "choices": ["texting", "gaming", "smoking", "phoning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567500, "question_id": "ikrtiazfWxuMxm5V82Luad", "question": "What type of window is in the kitchen?", "choices": ["bay", "casement", "double paned", "circular"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567500.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501109, "question_id": "imwBords8xXmKagG38FwGZ", "question": "What is attached to the board?", "choices": ["cat", "comb", "handle", "wheels"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501109.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406203, "question_id": "imwzk6iWU6efYBw9u2DRKR", "question": "What is about to happen to the plants situated on the truck?", "choices": ["planted", "thrown away", "harvested", "mulched"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000406203.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357447, "question_id": "in8R4CwpEMzGikgXcbSpoM", "question": "In which area is the flush mechanism for the toilet?", "choices": ["outside", "toilet top", "hose", "over bin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389912, "question_id": "inAsYNUowend9pakMmDYK9", "question": "Which one of these continents has these type of birds?", "choices": ["europe", "asia", "africa", "north america"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389912.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252142, "question_id": "ioJJNag9F8THFbzmPZSDcR", "question": "What is one of this creatures most serious defense mechanisms?", "choices": ["tail", "ears", "smile", "claws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465794, "question_id": "ip9PTfgNWLHYAN72LVZDzM", "question": "What are they doing under the clock?", "choices": ["eating", "drinking", "posing", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302798, "question_id": "ipLNHv5qJj5HPNFRJNXZS6", "question": "What type of person would most likely use the blue toothbrush?", "choices": ["grandparent", "child", "denture wearer", "adult"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390441, "question_id": "ipje8WXsn4jagPikdaQxmy", "question": "The food consumed here is grown in what type of environment?", "choices": ["indoors", "tropical", "temperate", "desert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390441.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491057, "question_id": "iq95bYWJozwbC8EMpJbUvG", "question": "To which destination is the driver of the VW most likely headed?", "choices": ["ski lodge", "shopping mall", "mountains", "beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121544, "question_id": "iqs9xz68orUPibJxzPYNEQ", "question": "What made the vertically oriented gashes in this wood?", "choices": ["pizza cutter", "knife", "spoon", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162980, "question_id": "ir2Q2v43J6JbbjPZgivTZW", "question": "Why is the player wearing gloves?", "choices": ["warmth", "fashion", "grip", "health"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162980.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144943, "question_id": "itqEk8wAjkGfnWbRgQ6B3e", "question": "What type of animal is this?", "choices": ["domestic", "aquatic", "reptile", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255591, "question_id": "iurXYqCMX6SnG2MfRrvSGv", "question": "What does the window protect the bird from?", "choices": ["cat", "heat", "humans", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000255591.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472685, "question_id": "ivUPXociD5DxGFrw86ZVak", "question": "The red and white pole on the rear of the hydrant is used to increase visibility in which type of weather?", "choices": ["thunderstorms", "snow", "rain", "tornadoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472685.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529543, "question_id": "ivhtvBRCe3fJRWo8UmSQ6D", "question": "What does the toilet have installed that is very rare to have?", "choices": ["sink", "lid", "plumbing", "seat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529543.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576917, "question_id": "iviXnShkAtfj8uQ5D2uyPS", "question": "The giraffe lying down under the standing giraffe is what species of giraffe?", "choices": ["reticulated", "thornicroft's", "south african", "masai"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000576917.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40353, "question_id": "iwUfD9FUGeGFDKX7tMhEXC", "question": "What charges this type of phone?", "choices": ["hamsters", "wind", "gas", "electricity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468641, "question_id": "ixrTksqu6L2FA4K864VwNG", "question": "How many of these should she eat if she is on a diet?", "choices": ["one", "zero", "two", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389853, "question_id": "ixtDGBwkv8nrmYaJ35ZDwm", "question": "The little girl in the picture is prepared for what?", "choices": ["game", "rain", "dinner", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384862, "question_id": "iy3Lp84GFvnfAGGS8uL6m9", "question": "The silver plane with the red stripes is constructed out of what material?", "choices": ["iron", "aluminum", "steel", "carbon fiber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384862.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439212, "question_id": "izLpYgwfvp6PqmZ6vobEwj", "question": "What type person likely defaced this sign?", "choices": ["paleo dieter", "carnivore", "vegan", "omnivore"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439212.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 429459, "question_id": "izw96W92JuHqVe9aR9ZNeo", "question": "What type of area is shown?", "choices": ["desert", "rural", "country", "urban"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000429459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288563, "question_id": "j24WN8dZRmQhLRhyktWbgr", "question": "What haircut looks most like a horse's hair?", "choices": ["mullet", "bald", "buzzcut", "afro"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288563.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52900, "question_id": "j2X9bqrB5dxjHqteaYByqn", "question": "This beast is usually pictured chewing on what?", "choices": ["birds", "brains", "bone", "deer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520608, "question_id": "j2cG2Vd8gKrrZ5feZPdEH7", "question": "What is near the elephant?", "choices": ["lake", "man", "cow", "barrel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211187, "question_id": "j2hCVPjhSs9EajQtCq946m", "question": "What country is represented by the symbol on the back of the hat?", "choices": ["kazakhstan", "mexico", "iraq", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387612, "question_id": "j2pMPPxn6xpDfxMc9eM8cM", "question": "What kind of natural phenomena shares a color with the bird on the right?", "choices": ["clouds", "fire", "smoke", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387612.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376742, "question_id": "j2s5hByAP76Umpqj8Cx9Aa", "question": "Why is the zebra alone?", "choices": ["lost", "for sale", "hiding", "cameraman posed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199848, "question_id": "j3UE5jopNDLaKooavZWq45", "question": "The bedding sits on what kind of flooring?", "choices": ["concrete", "wood", "carpet", "drywall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199848.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37948, "question_id": "j4UPkDirS7kvnXLgzybzji", "question": "What relation might the smaller animal be to the bigger one?", "choices": ["grandpa", "child", "father", "uncle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295416, "question_id": "j4Zno7MtVB6HtqtiYQuGTb", "question": "This and streets near it are unusual in which way for a city street?", "choices": ["extra wide", "narrow", "dangerous", "hills"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000295416.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4454, "question_id": "j4e5EVMGquRT4K2jzAEtyi", "question": "What is being cut out of the paper?", "choices": ["articles", "celebrity pictures", "recipes", "coupons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004454.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289542, "question_id": "j9KYrRboUVFH7JczPNQw5N", "question": "What type of road area is under these signs?", "choices": ["bridge", "intersection", "hill", "ramp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83429, "question_id": "j9Wty4RvUzSxpL3JWkqAnE", "question": "What person invented the red sign?", "choices": ["sanchez", "mortimer", "jefferson", "eno"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471970, "question_id": "jA4Y8uE8gmNeVobH9iwwK3", "question": "The black spiral on the bottom of the board looks like the logo of what?", "choices": ["wwe", "american express", "ibm", "comedy central"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471970.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353453, "question_id": "jARKbLP9zWfZHc5TALp8Lo", "question": "What direction is the bear looking?", "choices": ["up", "right", "down", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353453.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567291, "question_id": "jAbYaoFqHCysB3KWmBRZUG", "question": "What type of role play outfit is the woman wearing?", "choices": ["student", "dominatrix", "sub", "french maid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567291.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511035, "question_id": "jB8XXD6X4wv4NbcJgn2xWT", "question": "In which country would this type of clothing be most normal?", "choices": ["iceland", "united states", "scotland", "panama"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511035.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302445, "question_id": "jBo6YbyMMe2bZjnr2wEsGP", "question": "To which direction is the person looking to?", "choices": ["right", "up", "left", "down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302445.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214602, "question_id": "jC7xk66JFQ2QXNkcs8saeo", "question": "If he wants to travel in the opposite direction what should he use?", "choices": ["feet", "boat", "car", "bicycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214602.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431253, "question_id": "jDQSqLjrdguuekMdVXC3kn", "question": "What is atop the board?", "choices": ["food", "dog", "alien", "person"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431253.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551471, "question_id": "jDdoAH94cuYimPU2FWsrKW", "question": "Which one of these forms of needlework is displayed here?", "choices": ["patchwork", "knitting", "needle lace", "crocheting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200944, "question_id": "jDiSV7u8SPQniwGW4ewm3f", "question": "The person is on top of what?", "choices": ["car", "wave", "chair", "box"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200944.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457073, "question_id": "jEEabgWRJtSSYpTLA3yaQM", "question": "What is the white spiky object behind the pitcher used to remove dirt from?", "choices": ["mound", "cleats", "base ball", "boots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457073.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525951, "question_id": "jFGmExF7V6aCLXzCmRxp5o", "question": "What is the giraffe looking at?", "choices": ["sky", "fence", "ground", "camera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525951.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323318, "question_id": "jG3U9XDd2bBgj2kn8Li2fn", "question": "What are the red and blue upright poles called in slalom?", "choices": ["uprights", "gates", "sets", "bars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000323318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524111, "question_id": "jH3jgCN7S9y8URWeQeMX9x", "question": "What happened recently at this location?", "choices": ["raining", "eruption", "snowing", "hurricane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248778, "question_id": "jHmNkmmsND3Zm5xMsLRcBU", "question": "Where is this kitchen located?", "choices": ["restaurant", "hospital", "home", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319498, "question_id": "jJdwP43uDQUcuRmuaTrcTF", "question": "What time is it?", "choices": ["11", "125", "330", "1216"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247471, "question_id": "jJnTrjkJcEjMDu8bMQacRm", "question": "Which one of these animals might threaten her in this location?", "choices": ["mosquito", "arctic wolf", "scorpion", "polar bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56101, "question_id": "jJp6wjYBeX6feVn6Sf3i3v", "question": "The headquarters of this airline is closest to what airport?", "choices": ["jfk", "dulles international", "chicago/o'hare", "dallas/fort worth"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000056101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397297, "question_id": "jKPbeKCJgMMeD4bBR4vPRz", "question": "Where might the dog be?", "choices": ["forest", "beach", "yard", "field"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64887, "question_id": "jL9KAifdNgFg2i6Z5brAfQ", "question": "What is nearest to the cat on the floor?", "choices": ["shoes", "dog", "elephant", "television"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285372, "question_id": "jMr42gqu6pPS4HFtU7GRdS", "question": "Who is the maker of the plane?", "choices": ["beechcraft", "boeing", "cessna", "airbus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285372.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35644, "question_id": "jNZ9r6kWs2FBjqAJoBakNE", "question": "Which animal can get nutrition from more leaves in this area?", "choices": ["mules", "zebra", "giraffe", "dogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035644.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172819, "question_id": "jNiLbmVKjDhYQZjRYCkFRs", "question": "What is this baby trying to do?", "choices": ["cry", "drink", "brush", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172819.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476505, "question_id": "jPHUrUBfLaVgUEx69BGsUV", "question": "What country does this plane come from?", "choices": ["sweden", "italy", "russia", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515188, "question_id": "jPv7fvvWYQMPVghGTeR27Z", "question": "What type of plants are being grown in the planter?", "choices": ["trees", "vegetables", "flowers", "shrubs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515188.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45346, "question_id": "jQiYY9zupjULPjUyzeiDu3", "question": "The bean that makes the spread on his dessert is from what region of the world?", "choices": ["mesoamerica", "southern australia", "north africa", "central europe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566659, "question_id": "jR3P9nHJqhoZaBGX4nT6Hf", "question": "What is reaching for the item in the sky?", "choices": ["right hand", "left foot", "dog's nose", "cat's claw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566659.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22254, "question_id": "jR8ULhrkzYmWp5u4umfsJw", "question": "What quality is she trying to portray here?", "choices": ["wealth", "sexiness", "shyness", "intelligence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000022254.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372796, "question_id": "jRaE3RbdLtmTBQWBwHdjcj", "question": "This type of surfboard is made from what material?", "choices": ["fiberglass", "metal", "wood", "foam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372796.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265296, "question_id": "jSAAtvaFRuS5XTp2iKH5KG", "question": "What number is the small hand closest to?", "choices": ["two", "seven", "eight", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499160, "question_id": "jTYw3qDepX3Bxe7hUR8r29", "question": "The clock is at what time?", "choices": ["1200", "600", "330", "940"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499160.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83182, "question_id": "jUStc7hBqkdwrbZLmtbTFB", "question": "What kind of play equipment is the boy using?", "choices": ["swing set", "slide", "merry-go-round", "jungle gym"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117294, "question_id": "jUTuj6NGTrmyUKuijYRZMA", "question": "Where are these elephants located?", "choices": ["fair", "zoo", "circus", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117294.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 174224, "question_id": "jVSbcz2kqy8hpUdwDWTk4e", "question": "What place has many locations that resemble this one?", "choices": ["hawaii", "siberia", "egypt", "nevada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000174224.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352326, "question_id": "jVnL9UpSuaxS3DSCAHDJ76", "question": "What show did the person seen on screen here star in on TV?", "choices": ["caddyshack", "scrooged", "letterman show", "westworld"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352326.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498095, "question_id": "jVs7kdVEJ9fJ9mvy6ua2Ha", "question": "What athlete has the same name as the name on the sign on the left?", "choices": ["john amos", "peter bourke", "otis nixon", "ray winstone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397932, "question_id": "jWN8jWAbEiyi7HeQQPZKAg", "question": "Approximately what time will it be in 12 hours?", "choices": ["845", "930", "1220", "615"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519141, "question_id": "jXKhxP3iARhZEJWEvoecAx", "question": "What type of range can be seen?", "choices": ["shooting", "mountain", "cowboy", "gun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56215, "question_id": "jYLgx2YcZ4pDNTX9xhcXbs", "question": "Her accessories make it easy for her to leave the house without doing what?", "choices": ["her nails", "her hair", "applying lotion", "shaving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056215.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284438, "question_id": "jYQAye22vyx7ZSmGubdoVP", "question": "What is the area on the street with white lines permitting?", "choices": ["turning", "crossing", "speeding", "parking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284438.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578938, "question_id": "jYaRi6eyKKvy63bSy3oqAs", "question": "What would traditionally be served in the cups?", "choices": ["tea", "milk", "water", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147364, "question_id": "jZMq2Dnh53Z2dEPrhm3RJP", "question": "This man most resembles what Final Fantasy character?", "choices": ["professor hojo", "kefka", "cloud", "tifa"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000147364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123895, "question_id": "jZNapddV52MKEZLVzKqNzu", "question": "What did the object next to the tall skinny tree used to be?", "choices": ["rock", "fountain", "tree", "sculpture"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123895.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298053, "question_id": "jasD37fKDAnLbdcdDbJykR", "question": "What was previously done to the items that are sprinkled on the middle donut in the bottom row?", "choices": ["pureed", "fried", "sauteed", "chopped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541946, "question_id": "jbJjhg2N6vPtJXacM8Jsjk", "question": "What is this bear ready to do?", "choices": ["run", "roll", "sit", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562459, "question_id": "jbjJ7wWs4F2ucHRJ8ZQ7Uw", "question": "Which kingdom of life forms is most abundant here?", "choices": ["plant", "phytophera", "none", "animal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514206, "question_id": "jbwWUPmguKnozQjdEj4sxz", "question": "What is the paper here used for?", "choices": ["cleaning", "wrapping", "reading", "writing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514206.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507571, "question_id": "jd9JD6J9T7WrSWXgkWsqgV", "question": "Why do they have their heads covered?", "choices": ["uniform", "costume", "protection", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474445, "question_id": "jdVFmTgijWktLnty73ctX3", "question": "Which one of these is a method of obtaining this food?", "choices": ["delivery", "voting", "adoption", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474445.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354510, "question_id": "jdqfkDyrWhxzduCgj9rygP", "question": "What should the car on the right do?", "choices": ["stop", "wait", "go", "turn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354510.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488393, "question_id": "jdzbk7ES2grF2XGGhCfXpi", "question": "What shape is the button that is likely to pause the program being watched?", "choices": ["round", "triangular", "square", "oval"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488393.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417543, "question_id": "jf2dYNQmhn6FFe6rGYRq8W", "question": "Where are these devices on the wall exclusively found?", "choices": ["camp", "men's toilets", "airports", "art installations"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417543.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169059, "question_id": "jfBZz9EGsawEu9C3TuUEjb", "question": "What is near the person?", "choices": ["cow", "refrigerator", "door", "flamingo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157082, "question_id": "jfPRQVVJfKHrSgbGBrxQ5f", "question": "What must be in the air for the phenomenon visible here to occur?", "choices": ["darkness", "tornado", "moisture", "hail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326141, "question_id": "jg4v9Zg4mThphvB8ujWxqe", "question": "These footwear are more comfortable than which of the following?", "choices": ["flip flops", "heels", "sandals", "socks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309709, "question_id": "jgiswSTHEjDgWnBmJL89UY", "question": "What is keeping the giraffes from escaping?", "choices": ["water", "trees", "boulders", "fences"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176940, "question_id": "jhAJ2riZ3FtRqgGv7oHs2h", "question": "The counter the apple is resting on is what type of surface?", "choices": ["marble", "ceramic", "plastic", "metal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176940.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110249, "question_id": "jhEgfa5QfwadtVuGG8eQHg", "question": "What is the horizontal handle for?", "choices": ["takes pictures", "removing bowl", "releasing chemicals", "flushing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257571, "question_id": "jhUYPJYuCsRxoF3dGpjCSL", "question": "What are the people waist-deep in?", "choices": ["grass", "rocks", "sand", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 300463, "question_id": "jisGvyrwg9ZMELvg3jhYia", "question": "Which one of these animals is in the same phylum as the animal on the wall?", "choices": ["snail", "salmon", "butterfly", "deer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000300463.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 327482, "question_id": "jjWTGzQwSvfxBF47XD4cPt", "question": "What is usually found in this kind of room?", "choices": ["bookcase", "toilet plunger", "refrigerator", "pool table"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000327482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234833, "question_id": "jmXMQ58fppdcgGkbCHdkLz", "question": "This baby animal is known as what?", "choices": ["pup", "calf", "cub", "kid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558261, "question_id": "jnBNFaSEKzJLSFwDj8v588", "question": "Why is the bench next to the wall heater?", "choices": ["displaying it", "for sale", "drying paint", "warming people"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10373, "question_id": "jnwYsrruqGhSecuPwZMCdW", "question": "What is the job of this dog?", "choices": ["herd", "show", "search", "rescue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010373.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238433, "question_id": "jnx6Rr4Fx5BjLxVRM5Ywcm", "question": "By which method is this parking meter powered?", "choices": ["solar", "wind", "battery", "gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000238433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61876, "question_id": "jobHGtEMimdu5vTH2GgSif", "question": "What is the main reason for this booth?", "choices": ["rain cover", "bear", "calling someone", "heater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061876.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527230, "question_id": "jom5EdrHKYk4PcuycN3ZRK", "question": "What is the square item called?", "choices": ["oven", "cage", "box", "pan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 150084, "question_id": "jp5VkjCwvrpVCj5sQuDLHP", "question": "What famous mountain is seen here?", "choices": ["everest", "st. helens", "kilimanjaro", "mount fuji"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000150084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394485, "question_id": "jq28okWtfNeTuNQuULgKZ6", "question": "What type of area is the double-decker bus traveling nearby?", "choices": ["industrial", "residential", "commercial", "agricultural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000394485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36538, "question_id": "jq2bupbU97VqTvU773DS4z", "question": "What famous movement was founded in this neighborhood?", "choices": ["qanon", "me too", "hippie", "abolitionist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142296, "question_id": "jqHPG6KJEkw9YwrLhzJcAD", "question": "What object is the flowering plant hanging from above the green sign?", "choices": ["street sign", "lamp pole", "telephone pole", "power pole"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000142296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175265, "question_id": "jrMsUW4fYKt2xeBYVKbCga", "question": "What is on the wall?", "choices": ["shelving", "statues", "arcane symbols", "posters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301835, "question_id": "js9P4DebcdSvaRrwSG74YG", "question": "Who usually wears the item these people have on their heads?", "choices": ["pageant contestant", "bird watcher", "biker", "cook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291648, "question_id": "jt2tQAd5V7mH9hxQWNy6Xv", "question": "Why is the small elephant in between the larger elephants?", "choices": ["for protection", "to feed", "to hunt", "to bathe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537078, "question_id": "jupBe2CBWNuy55ec8as6W6", "question": "What is the location that has all of the blue umbrellas?", "choices": ["beach bar", "club", "family reunion", "resort"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195684, "question_id": "jvNktXjW39pbt2tP72wv6Q", "question": "Who is this restroom meant to serve?", "choices": ["men", "girls", "women", "everyone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412016, "question_id": "jvtLkfgDcfUKATmDqaFnuG", "question": "The toilet here is part of what?", "choices": ["seating", "mall toilet", "rest stop", "display"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412016.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320259, "question_id": "jwZUrcKueYmBxN5ioLBink", "question": "How is the striped item kept on her body?", "choices": ["zipped", "buttoned", "tied", "taped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320259.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231033, "question_id": "jwtYXstmTjKQZWzejNmijN", "question": "When one sees this sign one must do what to other car?", "choices": ["honk horn", "crash", "flash lights", "let pass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231033.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391716, "question_id": "jxVrnsyg5TmwFG4GLqZuhV", "question": "The blue buckets likely contain what?", "choices": ["candy", "pashmina", "water", "baby llamas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391716.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457853, "question_id": "jxcivq8XJuYLwhLm2vj9tJ", "question": "What is the appliance in the window used for?", "choices": ["heating", "cooling", "cooling", "watching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507681, "question_id": "jxp7NnaYHqft4rmD4gVQjh", "question": "How were the lines in the sandwich formed?", "choices": ["by cutting", "by boiling", "by grilling", "by microwaving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500668, "question_id": "jyDEoF2XeLxR7e4SmR9NWA", "question": "The animals necks are forming what shape?", "choices": ["x", "m", "z", "w"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500668.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64862, "question_id": "jyhA9pHkCQwdMWjuFvvNSr", "question": "How many different poses are present among the surfers?", "choices": ["three", "four", "one", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064862.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293987, "question_id": "jz2yw2jFGxCmWaNR23hbtH", "question": "Why do cats love warm places?", "choices": ["fur insulation", "hide themselves", "heat toleration", "hide foods"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000293987.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289056, "question_id": "jznd4grd7JEdYtmGAhDwMX", "question": "What is the oldest age that these animals live to?", "choices": ["70", "20", "ten", "30"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205127, "question_id": "k2YfpgnD9JHRUcdJyondN9", "question": "Which celebrity attended the school whose name appears on the green sign?", "choices": ["noomi rapace", "alexander dreymon", "keira knightley", "natalie portman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487062, "question_id": "k3P5yHEcqyDpAECZ33mscG", "question": "What might the kite flier do by accident here causing them to cry?", "choices": ["nothing", "lose string", "get hungry", "see clouds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454049, "question_id": "k4jibQ4TiTMyS8wKQi3nE7", "question": "What is this woman ready to do?", "choices": ["eat", "dance", "drink", "brush"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454049.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444889, "question_id": "k52LHbWVtyWyB9HuZxwSeH", "question": "What round item might be found on the tiles to the left?", "choices": ["basketball", "orange", "loo roll", "tennis ball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444889.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214104, "question_id": "k59tWGXRHDuXkDFX2xvvZd", "question": "What word is usually used to describe the long red item here?", "choices": ["edible", "upside down", "express", "winged"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 203964, "question_id": "k5w8FioKPGfWb6Sz9FNWme", "question": "What does this animal like to eat?", "choices": ["eucalyptus", "ants", "fish", "shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000203964.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444946, "question_id": "k5x7UwnYR8ic8kxgQf6ABi", "question": "Why is the bus a different color in the front than in the back?", "choices": ["design", "repair", "accident", "camoflage"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000444946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449470, "question_id": "k6e5C3EL9A8R42Fbf99pS6", "question": "Who is famous for playing the instrument that the toilet seat cover is shaped like?", "choices": ["jerry cantrell", "leon theremin", "elton john", "lars ulrich"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549896, "question_id": "k6pwbpegDfvR6cPnC9jtTu", "question": "What can be said about the weather here?", "choices": ["partly cloudy", "mostly cloudy", "sunny", "overcast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549896.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 361702, "question_id": "k7KbmQ3bC6boCNAoyLVrap", "question": "Higher in the tower what sound producer might be found?", "choices": ["pool table", "food", "dungeon", "bell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000361702.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311794, "question_id": "k8baAKyjqGdYNDyjs5UgC3", "question": "What are the people looking at?", "choices": ["cars", "giraffes", "emus", "mice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334817, "question_id": "k9eziTZwcft9vgAZreRiYT", "question": "What will they use to eat the item in the round container?", "choices": ["straw", "toothpick", "fingers", "spoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466048, "question_id": "k9kSLE8K2zkHCUmDcDUjbM", "question": "What are these containers used for?", "choices": ["food", "travel", "pencils", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466048.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427116, "question_id": "k9ptWZnAJhFy5SbDdedkrb", "question": "What would be unusual to find on this animal?", "choices": ["tail", "hooves", "wings", "tusks"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000427116.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229389, "question_id": "kANDniARAgz9Tg7v5DaqTE", "question": "What animal were these dogs originally bred to bait?", "choices": ["foxes", "cats", "bulls", "horses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207495, "question_id": "kCT7AkPJMRfKDvKXqth2AK", "question": "What does the cat's body posture suggest?", "choices": ["comfort", "happiness", "agitation", "sleepiness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207495.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38535, "question_id": "kCX3pkWqB76yX5cuGr35kf", "question": "Besides storage what's the purpose of the rack?", "choices": ["climbing", "weighing", "balance", "warming"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000038535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 497814, "question_id": "kCtSuqYSzAtvkUCah6kwzc", "question": "What kind of exterior building material is not present here?", "choices": ["brick", "metal", "concrete", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000497814.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298078, "question_id": "kDvunhcdxNUbVDKkFmkWdu", "question": "Why is he carrying a bat?", "choices": ["learning play", "stole it", "is posing", "his job"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130595, "question_id": "kE4JW8BUyTfGhqHw79Rrba", "question": "What snacks are available for persons in the living room area here?", "choices": ["radishes", "carrots", "fondue", "cookies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130595.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270187, "question_id": "kEdRyuBWoraSoQUmjp3tnh", "question": "What form of entertainment is on the screen?", "choices": ["movies", "video games", "visual novel", "tv show"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575075, "question_id": "kEhwDiUiovszPPWuvJhjkR", "question": "What is near the top of the roof?", "choices": ["window", "acorn", "squirrel", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575075.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194512, "question_id": "kFDzSyLV7HWitiHnYvAwsS", "question": "What is this dog set up to most resemble?", "choices": ["breakfast", "invisible man", "carrot", "clock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000194512.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20314, "question_id": "kFfrJB3eKq8ssRzS7zgwX2", "question": "The building material here comes from what?", "choices": ["grass", "wood", "clay", "metal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 42051, "question_id": "kG2kPqUbsJLz5y5PXsQYvW", "question": "Where is this located?", "choices": ["america", "asia", "europe", "africa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000042051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54565, "question_id": "kG8mWbAF7qzpuBuhzzjKJN", "question": "To visit this location one would need to go to what part of the union?", "choices": ["florida", "alaska", "hawaii", "utah"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 287948, "question_id": "kGSfGky2TjCkuDVPSu8vgA", "question": "The blue orange and green items look like what?", "choices": ["baseballs", "lollipops", "boxes", "cows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000287948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485646, "question_id": "kGUUiqpZMaRHnNvA23eNiX", "question": "Why is the mouse on the right?", "choices": ["was moved", "for right-handed", "left there", "no reason"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305164, "question_id": "kJLJXr7hfp4JYwTGnx2DSd", "question": "What is the mode of feeding of these animals?", "choices": ["herbivores", "carnivores", "all", "omnivores"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305164.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110722, "question_id": "kLoc7PpdtqjaKLu7hWVB3v", "question": "The people are spending the day where?", "choices": ["beach", "basketball court", "football stadium", "zoo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41224, "question_id": "kMMhGbrkEHEHnd8RbpPceW", "question": "What kind of area is this called?", "choices": ["conference room", "office", "yard", "library"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041224.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177481, "question_id": "kMZkNkFpHXDzCdcGXUs2Yn", "question": "What is this dog trying to do?", "choices": ["play", "bathe", "rest", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177481.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276, "question_id": "kNetViPVqGL9qfBHJnb37v", "question": "What religion is the item on the very top of the building featured?", "choices": ["hinduism", "christianity", "sikhism", "zoroastrianism"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173931, "question_id": "kP94crCpH5HQbY26qXRiav", "question": "An R is engraved on this horses what?", "choices": ["glasses", "sunglasses", "blinder", "bit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173931.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257648, "question_id": "kPNJPqUiXvA6uhsBLVJrEK", "question": "The dog is doing what?", "choices": ["growling", "yawning", "biting", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284339, "question_id": "kSP4vpJ2QMUXWXSrxgnYJZ", "question": "The person is holding an item associated with what profession?", "choices": ["firefighter", "dentist", "judge", "baseball player"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256321, "question_id": "kSrySGK74a4s4JPWJfnoj9", "question": "What are they doing?", "choices": ["arguing", "mating", "grooming", "fighting"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000256321.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375085, "question_id": "kTmZ9AVjCeicPpjvwuLLGU", "question": "What kind of eyes are these?", "choices": ["beady eyes", "bug eyes", "cat's eyes", "heterochromia eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000375085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561388, "question_id": "kUmrCwS42dg44rVxcUjWkt", "question": "What is used to attach the trailer to the truck?", "choices": ["rope", "chain", "nails", "hitch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407733, "question_id": "kUtPqAbw8JwH7NHCgE7rqL", "question": "What is this vehicle used for?", "choices": ["hauling", "carpooling", "racing", "commuting"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000407733.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552528, "question_id": "kVXQUVkVCqe6Ainmkebccc", "question": "In which county is the city to the right located?", "choices": ["yuba", "humboldt", "orange", "mendocino"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552528.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470027, "question_id": "kVnHMT9PNqXijd8S27vYfT", "question": "What does this animal have on the side of it's head?", "choices": ["horns", "antlers", "ears", "gills"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284162, "question_id": "kWkmrfYasbQPurBrsCvdim", "question": "What does the man have in his left hand?", "choices": ["baseball bat", "spear", "hockey stick", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146183, "question_id": "kXwjSJacAhyEWDeW4E2t26", "question": "What is blocking the sun for this woman?", "choices": ["sunglasses", "hand", "paper", "foot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146183.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143451, "question_id": "kYdSv2W8KiEVJkfPMtS5bP", "question": "The knobs on the wall operate what?", "choices": ["gas", "flush", "water valves", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000143451.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302314, "question_id": "kZ7U4RFN48LjG4EqE2HJKP", "question": "How many giraffes are there?", "choices": ["five", "seven", "two", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51072, "question_id": "kZpuGk4xxSfpncYDx3EUh5", "question": "What is connected to the man and he is hanging on to?", "choices": ["sail", "fish", "plane", "boat"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000051072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424512, "question_id": "kZxZFp8ZWExFMHhmYerCDc", "question": "How many languages are noticeable on the two signs in the foreground?", "choices": ["three", "two", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424512.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549601, "question_id": "ka6ah4VWuJCEHjihrYMHhq", "question": "What group of people is particularly accommodated in this bus?", "choices": ["elderly", "pregnant", "blind", "handicapped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107350, "question_id": "kaKh5MHvtS7NN6x9h5uQAd", "question": "What is the cat likely sitting on?", "choices": ["bed", "armchair", "sofa", "recliner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107350.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394427, "question_id": "kaKqJaYgyYRAZiFmhKCjiZ", "question": "What sort of e mail service does the person who lives here use?", "choices": ["western union", "yahoo", "uber", "gmail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000394427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40832, "question_id": "kafSc4nSvqKny8DyDbvJEq", "question": "The dog is sitting on what?", "choices": ["sand", "snow", "hay", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040832.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373711, "question_id": "kam3TCoXN2L9fnGJw6kBdJ", "question": "What is the most efficient way to travel up this mountain?", "choices": ["car", "walking", "lift", "skiis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402005, "question_id": "kansCovUU3eBEppaUUKMQB", "question": "What type of vehicle might this be?", "choices": ["car", "school bus", "truck", "subway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402005.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460977, "question_id": "kbaNBYXDo9X49eR2MbsNit", "question": "Wheelers travelers are belongs to which country?", "choices": ["italy", "france", "uk", "us"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507932, "question_id": "kdMsBZUeUPA6R56bNtRLgX", "question": "Droplets seen here are what?", "choices": ["urine", "cleanser", "mud", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275134, "question_id": "kdSHefsWCDhioHAet7bNMw", "question": "Are people allowed to enter beyond the post?", "choices": ["probably no", "definitely yes", "no", "yes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275134.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299624, "question_id": "kdqa7tc6wMGiQrHxtdASVf", "question": "What color hair does the boy have?", "choices": ["blonde", "blue", "red", "brown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299624.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350776, "question_id": "keEoufyZdn685BCC4nm4wq", "question": "What type of information is shown?", "choices": ["speed", "time", "temperature", "direction"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350776.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524251, "question_id": "kedNShcMJeV7fqsgWpfRKw", "question": "About what stage of life are these giraffes?", "choices": ["elders", "adults", "babies", "adolescents"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524251.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376346, "question_id": "kepQ5RvHj9vtQgZ7ebqMid", "question": "What allows this toy to get elevation?", "choices": ["wings", "string", "motor", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173829, "question_id": "kfen8Qui8a7Vwfizc6esGM", "question": "Which hand is in most danger of being crushed if the skateboard tips?", "choices": ["man's left", "child's left", "child's right", "man's right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415233, "question_id": "kfik8WzDewUY3Vk4ePsC8P", "question": "What type of watch movement is the child's watch?", "choices": ["digital", "quartz", "self-winding", "automatic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415233.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516073, "question_id": "kgNWzmjpr9G8cKvSr3cP4E", "question": "Which primary color is missing from his board?", "choices": ["purple", "green", "red", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516073.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1024, "question_id": "kgm5TVLFJCtQRgquwbotSZ", "question": "Why does the cat like lying on the laptop?", "choices": ["aggressive behavior", "blocking view", "excitement", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000001024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27694, "question_id": "kgxLg8NHMyeVfZ8vMZNDu6", "question": "What type of vehicle would people use to travel in this location?", "choices": ["planes", "boats", "bicycles", "trains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027694.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75854, "question_id": "khGSRnJ4T3Sopo36xxXqQh", "question": "What is the potential danger if the train goes forward?", "choices": ["hitting train", "hitting building", "derailing", "hitting workers"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000075854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309709, "question_id": "kiHG9okGjYZ9iUGsHpbSSk", "question": "What is keeping the large animals from escaping?", "choices": ["vehicles", "predators", "fences", "flooding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513539, "question_id": "kiJCMqd5iNrgdcSDRKEqgx", "question": "What is the most likely topping seen here?", "choices": ["green pepper", "meatball", "gummy worms", "fish sticks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513539.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351341, "question_id": "kiUDpPuBZKNP92JX4Le2dS", "question": "The size of the sink indicates the house is probably what?", "choices": ["extra large", "small", "large", "gigantic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462942, "question_id": "kjPpkUzBFt7LPysTz5CgmU", "question": "What is the similar sport on the ocean called?", "choices": ["surfing", "parasailing", "snorkeling", "canoeing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 568136, "question_id": "kjQNFsCkRrxVV5Pev2tGm2", "question": "What is the item in the bowl used for?", "choices": ["melting snow", "eating", "fixing computers", "decoration"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000568136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7171, "question_id": "kjW47LGCAd8RdhXZ9SrFZk", "question": "What is needed for this activity?", "choices": ["bat", "board", "racquet", "stick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007171.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 530553, "question_id": "kji9QGBiuKDv3QdeUBwPye", "question": "Why is he upset?", "choices": ["cold", "hot", "lost", "hungry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000530553.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100362, "question_id": "kjpivQKuNvztZ88SkmsD9H", "question": "At which life stage is the cat in the picture?", "choices": ["adolescent", "adult", "senior", "kitten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445047, "question_id": "kkZnL5vzGzEXiUp3yuQdqR", "question": "What is the material attached to the back of the surf board used for?", "choices": ["speed", "aerodynamics", "grip", "weight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390063, "question_id": "kma9gRGkRmdGFW3FkbRsJL", "question": "What profession encourages the behavior of the man?", "choices": ["fire fighter", "police officer", "dentist", "circus acrobat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268683, "question_id": "kmzXRxwbUdT4mtkL3C7WzL", "question": "Based on the trees what season is occurring?", "choices": ["summer", "spring", "fall", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268683.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61804, "question_id": "koPTw8VeneAZkGpwEudjcn", "question": "In what country is this commuter rail system used?", "choices": ["austria", "germany", "denmark", "brazil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061804.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195511, "question_id": "koYNBzWEHbn94ahoAMrVbn", "question": "Where is the man likely headed?", "choices": ["wrestling ring", "zoo", "beach", "business meeting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305127, "question_id": "koYZA2AoALnxDPWb5NsXmA", "question": "What activity is this person engaging in?", "choices": ["playing parachute", "camping", "hot balloon", "flying kite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243257, "question_id": "koaogHQFhDS85tFkSeLbHd", "question": "What is this man ready to do?", "choices": ["dribble", "sprint", "swing", "juggle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541066, "question_id": "kpP57Dkc4WBDBUEdnsRQgr", "question": "What is the elephant on the left referred to as at this age?", "choices": ["fish", "bovine", "calf", "doe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265680, "question_id": "kpdHoQmvuP3L3aDNVCM7Yk", "question": "Who does the animal look at here?", "choices": ["dog catcher", "driver", "self", "no one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133991, "question_id": "kqxQGru8aYdsW5psnsWtHE", "question": "In the plastic container seen here what is held?", "choices": ["milk", "poison", "giraffe food", "bug spray"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517709, "question_id": "kqxSHrhxtGEYQ4AzuE2db3", "question": "Which elephant is more likely to be able to escape through the fence?", "choices": ["both", "photographer", "bottom elephant", "top elephant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44841, "question_id": "kr2xFS4Q8JjAR9jRYDmqRr", "question": "The item on the wall is used for putting out what?", "choices": ["fires", "husband", "garbage", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044841.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434601, "question_id": "kr3awMo3PBrQkPzPndPJRn", "question": "What time of day is this?", "choices": ["evening", "night", "morning", "midnight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362474, "question_id": "krWJyQGVGXq6PWtaxaPDRb", "question": "If lighting struck anywhere near this spot what might be the first candidate?", "choices": ["ground sculpture", "deciduous trees", "man", "clock building"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298476, "question_id": "krdtCtccvk8u6KAPifQqjo", "question": "Why are these five so close together?", "choices": ["crowded beach", "fighting", "sharing wave", "friends"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000298476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378144, "question_id": "krhzrtDcAqZbku8oDTmFPf", "question": "What do the metal teeth between the handles open?", "choices": ["windows", "screws", "bottles", "bolts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343110, "question_id": "ksvfCFDkxdkDFyYyLL6J5g", "question": "Who plays this sport?", "choices": ["cm punk", "brock lesnar", "scottie pippen", "pete alonso"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343110.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373524, "question_id": "ksyAgUybMyPrn7KcKjLD8h", "question": "What is coming through the window?", "choices": ["animals", "light", "furniture", "people"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458731, "question_id": "kuuS4wp7YxyNVaXMp8bBGY", "question": "What is the fixture to the right of the toilet in the bathroom?", "choices": ["sink", "child's sink", "foot sink", "bidet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85240, "question_id": "kxZaPykm3qrbi5zf4X2NFn", "question": "What are the bears doing?", "choices": ["dancing", "laughing", "mating", "fighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362009, "question_id": "kxqZ7KcW8HeD79ksQ9ZSTh", "question": "What is the table for?", "choices": ["block intruders", "for sale", "place things", "sit on"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362009.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286960, "question_id": "kyGD4QtsCusCw5szFdNh5h", "question": "Which one of these might they put in the container?", "choices": ["clothes", "flowers", "cables", "garbage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440053, "question_id": "kygF2wQ5pJuaG48nYTHmPL", "question": "What country is on the van?", "choices": ["france", "romania", "england", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235685, "question_id": "m39LowbmqpYn9xXqfVLUV8", "question": "What breed is the large dog on the front of the boat?", "choices": ["weimaraner", "sheepdog", "beauceron", "azawakh"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235685.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477971, "question_id": "m3GivRsc9RuxVPXMqqyjYu", "question": "What is the bear doing?", "choices": ["jumping", "eating", "resting", "hunting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181335, "question_id": "m3HgQkqgGVgZAWHG5X3qyB", "question": "What kind is busses are shown?", "choices": ["double decker", "shuttle", "mini", "school"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000181335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117209, "question_id": "m3hpprDwGDV2reLpWW2AmN", "question": "Who likely paid for the graffiti like art work on this bus?", "choices": ["no one", "transit authority", "taxi company", "usda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353668, "question_id": "m4nw5soBYE8DUtQ5L7xQnx", "question": "What can be said about the weather here?", "choices": ["sunny", "partly cloudy", "overcast", "mostly cloudy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353668.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172388, "question_id": "m5hgfes8HvUajhU6wbVKCT", "question": "What type of cargo is this train carrying?", "choices": ["passengers", "chemicals", "coal", "grain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396616, "question_id": "m6ocWUZ3NyrYqLNcWa2Xuw", "question": "What is the temperature of the air surrounding the building?", "choices": ["warm", "mild", "hot", "cold"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396616.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82712, "question_id": "m8DU4UkdSMhqTkK4uv2Woe", "question": "What kind of items are debasing the sign?", "choices": ["posters", "litter", "graffiti", "stickers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082712.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171169, "question_id": "m9bDSZhuo7SuARS7vJ32MW", "question": "What is the area the rider has his feet in called?", "choices": ["western", "nest", "tackle", "stirrup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171169.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374484, "question_id": "m9zGp77kyoHKgTmrFs8mkL", "question": "What type of food is shown?", "choices": ["burrito", "sandwich", "pizza", "taco"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374484.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315886, "question_id": "mAN38bbJP6ZYdFsKcEd94S", "question": "How did these birds likely get here?", "choices": ["hiked", "born here", "via horseback", "flew"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315886.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328262, "question_id": "mBULb9qqzszjAVhGQkcniz", "question": "Looking at what might yield the animals name?", "choices": ["leash", "tail", "collar tag", "marriage records"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000328262.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160024, "question_id": "mBtiz9whMe3HGAGwq6kgiW", "question": "Which stage of cooking is this raw uncooked thing about to enter?", "choices": ["grill", "microwave", "oven", "frier"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345176, "question_id": "mCBjzfoM9oGQXgZPrBnBJB", "question": "What might you see in the blue thing at the top?", "choices": ["bike", "car", "plane", "skateboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345176.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100420, "question_id": "mCStLKjdhh63E2vAZQLcx6", "question": "This city is located in what part of South America?", "choices": ["western", "northern", "central", "southern"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100420.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494350, "question_id": "mCVjCnCkZPabuMGmBjR7YT", "question": "What is the state of the lid?", "choices": ["up", "down", "broken", "detached"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494350.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464627, "question_id": "mDXkrdWWE3Gx3qdoWxi8dS", "question": "What is the utensil?", "choices": ["spoon", "chopstick", "fork", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385200, "question_id": "mE8kaYbEMmv4V8PVX84eXV", "question": "What is another word for the item with the blue handle?", "choices": ["shears", "villa", "motel", "boxes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32619, "question_id": "mFnYmDph4MVYXMZdc9HLVG", "question": "Who will get water from this?", "choices": ["lifeguards", "teachers", "policemen", "firefighters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032619.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118746, "question_id": "mHKnsEd7psKrpu2iQvYAZ9", "question": "The guest can make use of the folded items during what?", "choices": ["checking out", "shower", "sleep", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118746.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309593, "question_id": "mHpE5dCo6K3y434uxMKZNh", "question": "What color is the sign with the arrow pointing to the right?", "choices": ["green", "orange", "blue", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309593.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152042, "question_id": "mJNfqHQLcgKuseUbzk5s49", "question": "What type of area is this?", "choices": ["downtown", "commercial", "coastal", "residential"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152042.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77051, "question_id": "mJTd6PbL466hfLf4MfPosz", "question": "How many people have used this restroom today?", "choices": ["five", "six", "three", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156745, "question_id": "mJogzVcfQ4igKcZBJuHAp5", "question": "How long ago did they receive this message?", "choices": ["3 years", "11 years", "5 years", "20 years"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129931, "question_id": "mJwrYPdFY43himjwZrSUmP", "question": "What is the man holding?", "choices": ["apple", "cell phone", "pencil", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129931.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577356, "question_id": "mKwqMY3GhDX547FcA5pkXR", "question": "What liquid was originally in the bottle that the flowers are now in?", "choices": ["orange juice", "beer", "milk", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577356.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484691, "question_id": "mLKHfym2yMtzfRGkk7ZmVv", "question": "What part of the skateboard is stalled on the curb?", "choices": ["bolts", "bearings", "grip tape", "truck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566775, "question_id": "mLeYNvGw3hhnm8rJoQjpsY", "question": "Whose shadow is being shown?", "choices": ["baseball player", "hockey player", "tennis player", "basketball player"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569894, "question_id": "mLg4eZxeXzps8AfcAccAgx", "question": "What expression does the cat have?", "choices": ["sad", "smiling", "asleep", "angry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517842, "question_id": "mMUCPg4GSrYUoBoCv2SSge", "question": "Where does this animal keep its young?", "choices": ["underground", "cave", "nest", "pouch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412192, "question_id": "mMrNwYKwAcWyRud7LJy26C", "question": "Who probably owns this yard?", "choices": ["mayor", "cop", "dentist", "contractor"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000412192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 203550, "question_id": "mPoNnFWJ9bt9kNNQhPQsWq", "question": "Who is the current CEO of the brand of this computer?", "choices": ["bill gates", "tom gates", "bill cook", "tim cook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000203550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505741, "question_id": "mQ9kevyfJw9HH8KVj2q7Do", "question": "What is the elephant standing on?", "choices": ["feet", "hands", "head", "trunk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505741.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280653, "question_id": "mRciXHrfyzfU967Tq5ZPZJ", "question": "What number is on the red shirt?", "choices": ["four", "two", "three", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559711, "question_id": "mRmQFZQhwAL7yMGEkuZnA7", "question": "Which company owns the advertised website?", "choices": ["funsmirk", "brightface", "smugmug", "biggrin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204397, "question_id": "mRrRqds9g9QnSj2cx2CN8v", "question": "The woman supports The United Kingdom and what other country?", "choices": ["poland", "indonesia", "dominican republic", "montenegro"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395760, "question_id": "mSoGde9xXCciQVKFt8WWYP", "question": "Who shares the last name as the name on the shirt?", "choices": ["bruce willis", "chris tucker", "milla jovovich", "jackie chan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527230, "question_id": "mT6bBrtKky6yoqKStbus9C", "question": "How has this food been packaged?", "choices": ["paper", "bagged", "wrapped", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561237, "question_id": "mTCreoXNZRmBqkkXbyReqN", "question": "Who is a sponsor of this snowboarder?", "choices": ["ford", "toyota", "mazda", "jeep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 392968, "question_id": "mTdTo2yU47ajrGwnZpprRV", "question": "What is the bird doing?", "choices": ["falling", "flying", "eating", "spreading wings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000392968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333933, "question_id": "mUnQhD2ePZQtY9qg7PPnp5", "question": "What can be told from this object?", "choices": ["speed", "brand", "date", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29174, "question_id": "mV78gtiqLFwfYiHzVKPfsD", "question": "How many birds are flying in the sky?", "choices": ["eight", "two", "seven", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029174.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39026, "question_id": "mVMkC2zY6U7GcJtCarWVXH", "question": "What activity are the people in the painting doing?", "choices": ["para sailing", "surfing", "kite surfing", "skim boarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52028, "question_id": "mXoHw6EficbaSnLTgycJwz", "question": "What would this animal have eaten today?", "choices": ["meat", "soil", "leaves", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052028.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410188, "question_id": "mYKEJEJaEBCzBrMyv8FGse", "question": "What activity would someone perform with the tools in this room?", "choices": ["exercising", "stretching", "measuring", "reading"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410188.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226358, "question_id": "mYt3miFiUQkHRzSpH2NAXD", "question": "What type of growing method is used for this plant?", "choices": ["hydroponics", "aeroponics", "soil", "aquaponics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553272, "question_id": "mZ9drVa3jwfz9sshHyEjZe", "question": "What time of day is it here?", "choices": ["morning", "night", "evening", "afternoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553272.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503463, "question_id": "mZnRVRjhLv24rYopJrkphF", "question": "What is the first name of a very famous brand of this vehicle?", "choices": ["harvey", "harold", "henry", "harley"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503463.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 469132, "question_id": "ma3tu7J3pBBJVTey7R7qDS", "question": "The work of what type of preservationist is seen here?", "choices": ["florist", "taxidermist", "conservator", "archivist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000469132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272537, "question_id": "ma7FZ4CJvrui9fvgWQEMky", "question": "What breed is the dog in the photo?", "choices": ["siberian husky", "german shepherd", "rottweiler", "poodle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271536, "question_id": "maJgRdNbDRg9Eut28SbHrn", "question": "What is the man looking at?", "choices": ["video", "contacts", "photo", "internet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271536.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581264, "question_id": "mbbCxKKJsApTBtMb7TwpdZ", "question": "Holding her head there allows her to do what?", "choices": ["smell item", "hide", "taste item", "breathe better"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581264.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111089, "question_id": "mbhDjrpuPGMWTmUZpqGiMF", "question": "What direction are the birds flying?", "choices": ["east", "south", "west", "north"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111089.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448108, "question_id": "mdCLUVkv8abTuXrhAEBES4", "question": "The person on the skateboard is best described as what?", "choices": ["teen punk", "millennial", "adolescent", "senior citizen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448108.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218276, "question_id": "mdNDRRYtykdg5NMDhycCGH", "question": "What is the name of the specific substance that the giraffe is relaxing next to?", "choices": ["fungus", "grass", "wood", "leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238283, "question_id": "mdgkpmYBu23w9HDbELy2YJ", "question": "These two trucks are both in what kind of business?", "choices": ["towing", "insurance", "package delivery", "sanitation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000238283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520465, "question_id": "mesCanCfmjAakjSaNydpax", "question": "The shell is most likely made from what vegetable?", "choices": ["corn", "asparagus", "cauliflower", "carrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520465.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565156, "question_id": "mf6f6Sy28gQNEzzfQTv9Tq", "question": "What might be contained inside the red case?", "choices": ["turtles", "snakes", "clothing", "books"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544973, "question_id": "mfxQKWXpnouLdeFfh4t5R8", "question": "Besides mother what other relationship would she most likely have?", "choices": ["sister", "babysitter", "pet sitter", "doctor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544973.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557362, "question_id": "mgFkLPTT9x2zAXdaxVYBY2", "question": "What is the cat next to?", "choices": ["statue", "giraffe", "fence", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363772, "question_id": "mhyWTbyDRQY4cuDCeMyh9B", "question": "In which location ahead are Rocky Mountain Sheep in gravest danger?", "choices": ["farm", "field", "roadway", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363772.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462538, "question_id": "miCJb22JPaw9CyjZ4BrD72", "question": "What material is this structure made of?", "choices": ["bamboo", "metal", "ceramic", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53934, "question_id": "miNbbtsbPpd4ZGNDwWqsTH", "question": "Which one of these can help clean up the area?", "choices": ["rake", "slippers", "pen", "broom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053934.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93616, "question_id": "mjgJLvCydawSUrttEbiYVC", "question": "What language is the white notice written in?", "choices": ["hindi", "hebrew", "arabic", "korean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093616.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372745, "question_id": "mjuXbTqh3X2Hk9rqFG7g3s", "question": "The meat is most likely what?", "choices": ["veal", "pepperoni", "steak", "venison"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79789, "question_id": "mkGywBBn3iQsn4Y2WJhdYj", "question": "What are the people riding in the pictures on the side of the bus?", "choices": ["bicycles", "tractors", "horses", "cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232669, "question_id": "mkpsmgNph9tE4FzwQqgvuc", "question": "Near which ocean is this building located?", "choices": ["arctic", "indian", "atlantic", "pacific"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358235, "question_id": "mktTooR76UNMCHuFXr2PhU", "question": "What decade was this picture definitely not taken in?", "choices": ["1970s", "2010s", "2000s", "1990s"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000358235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108632, "question_id": "mmMZFSTtdRXdJurrfZfwfD", "question": "What is this container used for?", "choices": ["food", "papers", "water", "travel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108632.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528542, "question_id": "mmZSp8JkKbj5gJzMJoA2oH", "question": "What are the giraffes trying to do?", "choices": ["sleep", "run", "eat", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342733, "question_id": "mnKwndpMbEU3cuNULqWvXn", "question": "What is the shape flying through the sky?", "choices": ["square", "circle", "flat", "halfmoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342733.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476395, "question_id": "mnY6T4E9chDDCsVAjxXXci", "question": "Why does he have his head covered?", "choices": ["religion", "warmth", "protection", "fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000476395.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110853, "question_id": "mnrRbuYUSFGyaH4EVZmEHm", "question": "In which country is the brand company of the green laptop located at?", "choices": ["china", "united states", "britain", "india"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512804, "question_id": "mo7j6QHKzv23uhMpyMJ3eX", "question": "What type of transportation is shown?", "choices": ["air", "water", "road", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512804.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289524, "question_id": "moAqinZRXiZs2ygNAoXEGp", "question": "What order in the meal will this be served?", "choices": ["last", "middle", "first", "second"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424036, "question_id": "mojEgYWPnZ4u8TmBXBUr9g", "question": "What is the air temperature where the boy is seated?", "choices": ["chilly", "cold", "warm", "freezing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 140784, "question_id": "mpjhfA2fGcs98chnNfyzvv", "question": "What does this animal like to chew on?", "choices": ["bones", "cows", "guava", "sharks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000140784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477874, "question_id": "mqcwxSt3grV64DGDhJN6jf", "question": "Who is on the surfboard?", "choices": ["goat", "child", "woman", "man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477874.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508405, "question_id": "msDCbUWHQHRLYNQpkMpeJF", "question": "What does a real version of the stuffed animal make?", "choices": ["honey", "cheese", "silk", "milk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508405.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466470, "question_id": "msH6cgvb6uXfVz4TWgQFEc", "question": "What would be the most likely filling for this cake?", "choices": ["mash potatoes", "milk", "toothpaste", "ice cream"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59045, "question_id": "mtJ3WkdJL8UX8xPirKw7Up", "question": "Which one of these items would likely be added to this meal?", "choices": ["worcestershire sauce", "hot sauce", "ketchup", "soy sauce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059045.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355148, "question_id": "mtV6Yr9d6mgMk3CzzNnfZj", "question": "What sport is being played?", "choices": ["rugby", "cricket", "european handball", "ultimate frisbee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355148.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294385, "question_id": "mtzcXEp3FXxDk2YN6dvxQi", "question": "What is the stump being used for?", "choices": ["fire wood", "decoration", "cutting board", "fire pit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294385.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291706, "question_id": "muMJojAogn4VZq4Nyr3PLn", "question": "What will the boy do with this ball?", "choices": ["serve", "dunk", "throw", "kick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291706.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551504, "question_id": "mupeprioY79WsKkTXSCAGX", "question": "How many more letters are needed in order to spell Australia?", "choices": ["three", "six", "two", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551504.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7131, "question_id": "mvEoxgQW2memJy2s3QYtau", "question": "Why is the man wearing black pads on his knees?", "choices": ["to clean", "style", "to cook", "protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007131.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142044, "question_id": "mwtNRhr2UyGf3rAcKQk6v7", "question": "How many people could get one serving each?", "choices": ["eight", "four", "six", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129646, "question_id": "mxHf2iRA28UmEYWvJPR2fp", "question": "What type of parking is available?", "choices": ["lot", "diagonal", "valet", "street"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 124649, "question_id": "myykaiG9BryaXbtVNv9w4e", "question": "What is this bear trying to do?", "choices": ["rest", "attack", "eat", "run"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000124649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260112, "question_id": "mzqsSzBkEu2mir7kqDBPLo", "question": "What are the red items on top?", "choices": ["watermelons", "peppers", "tomato", "cherry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260112.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152018, "question_id": "mzyNKuLLaEf9Ai9hkC5rJA", "question": "What does the bear seem to be sitting on?", "choices": ["straw", "stone", "grass", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152018.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363518, "question_id": "n3RqNBjRdGQXyhZsiKd2Bx", "question": "What kind of shirt does the man have on?", "choices": ["colorful", "long sleeved", "tuxedo", "business"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363518.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23765, "question_id": "n3gB2aHhKLr7FsYsSbmC77", "question": "Which item needs a flame to maximize its scent?", "choices": ["open jar", "light", "bowl", "closed jar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323742, "question_id": "n3zNMG4cYsakwivNRmg9vm", "question": "What venue is this person at?", "choices": ["race venue", "zoo", "park", "horse barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000323742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122545, "question_id": "n4jfQvccnsUCfeQEyUs8L7", "question": "What is the same color as this person's pants?", "choices": ["kiwi", "orange", "tomato", "plum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122545.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353668, "question_id": "n549yeQSDgMA9xitM8q7YZ", "question": "What can be said about the skies above the giraffes?", "choices": ["cloudy", "partly cloudy", "sunny", "mostly cloudy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353668.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464137, "question_id": "n5HfhiNyctnoGU49TDQLDV", "question": "Who is the guy looking out the front window?", "choices": ["conductor", "ticket master", "engineer", "switch operator"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464137.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412212, "question_id": "n5sCoA5THTJbA5TXXkRggp", "question": "What type of animal is shown?", "choices": ["monkey", "teddy bear", "gorilla", "alligator"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412212.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3229, "question_id": "n7ayerkXGiYJV5H6keQzEA", "question": "What did the local weatherman likely predict on this day?", "choices": ["tornado", "rain", "sun", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003229.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170242, "question_id": "n7kczaMggghp3jErHGzrkq", "question": "What is laying in front of the dog?", "choices": ["bowl", "bone", "leash", "frisbee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294464, "question_id": "n8EQWKRLF8uJV4a5m5uqzB", "question": "This site specializes in selling what type of goods?", "choices": ["hand crafted", "factory manufactured", "found objects", "fast fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96135, "question_id": "n8jCkQaQKgiMW4wuZcusVd", "question": "What is only one person wearing correctly?", "choices": ["mask", "goggles", "crossbow", "helmet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096135.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249863, "question_id": "n944KXsr5KpDHKm4heoo89", "question": "What object is behind the dog?", "choices": ["tent", "umbrella", "kite", "hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507639, "question_id": "n9VFHT3Q3YyijCnwh5d95m", "question": "What is the man in the black jacket likely looking at?", "choices": ["ground", "wallet", "hands", "camera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507639.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47874, "question_id": "n9bfy5UU6sKTnwViZXuTh9", "question": "What is most messed up here regarding the toilet?", "choices": ["plunger", "bowl", "seat", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000047874.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169927, "question_id": "n9cDXCLL5885bnJPw27SP4", "question": "What type of sign is shown?", "choices": ["street", "brand", "informational", "promotional"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169927.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123488, "question_id": "nBs7NGZCRToiC2VYTuSXsn", "question": "Which one of these languages can tourists expect to listen to their tour in?", "choices": ["flemish", "creole", "japanese", "navajo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123488.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 166055, "question_id": "nCVLCQKeuo6xZnjTDqccsT", "question": "What is needed for this activity?", "choices": ["sand", "water", "ice", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000166055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346664, "question_id": "nCcyyPkhBpaUvbc6VqNVTb", "question": "What is a topping on the pizza that needs to be removed?", "choices": ["muscle shell", "crust", "sauce", "shrimp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475383, "question_id": "nEARfCf9o4RG6HEp9LMqX2", "question": "How many cats are near the TV?", "choices": ["one", "two", "four", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000475383.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349257, "question_id": "nEDF2bDAoPXJsNv58Ybqdt", "question": "What class would make the item in the center?", "choices": ["art", "science", "cooking", "math"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510338, "question_id": "nEFAxSDrvdy3X8AKTGbCqJ", "question": "What is this type of sign called?", "choices": ["traffic", "warning", "price", "brand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510338.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501985, "question_id": "nFKcZUGbFeQfU4KkEWj3tM", "question": "What colour is his shirt?", "choices": ["pink", "orange", "blue", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501985.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404317, "question_id": "nFPvxqSXkEvHaBR3A3kJ2B", "question": "What type food is this animal likely to eat?", "choices": ["elephant", "bird seed", "steak", "cotton candy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404317.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161737, "question_id": "nFbptbcN9hmDEZNAf6Qfzz", "question": "The person with the board looks at whom?", "choices": ["boss", "enemy", "photographer", "no one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161737.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29899, "question_id": "nGWBZgYnUZXjuL5iAmjUSM", "question": "Besides the fence what else keeps the dog from going where it wants to go?", "choices": ["shadow", "cat", "leash", "tired"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029899.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545023, "question_id": "nH377pdyx66w9B43XUgeTB", "question": "Where is the man?", "choices": ["amusement park", "bank", "skate park", "supermarket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223329, "question_id": "nJQyPxSqCm6rXorLNYxxEn", "question": "What is this type of window called?", "choices": ["arch", "computer", "picture", "stained glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560209, "question_id": "nJoxB4K3ip376tWGD8FCr5", "question": "What word is missing on the message shown on the back of the truck?", "choices": ["are", "see", "is", "be"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6769, "question_id": "nK9D28KQzHYtjHY6qgqmnR", "question": "What are the girls blue bottoms made from?", "choices": ["denim", "plastic", "leather", "aluminum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314099, "question_id": "nMjuCDhcmmeKAvvNsgTKmX", "question": "What flower would most be appropriate for these vases?", "choices": ["small roses", "dinnerplate dahlias", "sunflower", "oversize plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314099.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464627, "question_id": "nNJAcfGdwDmrVyw7uqAywt", "question": "Which country is known for using the utensils being used to consume this meal?", "choices": ["italy", "india", "ethiopia", "japan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3630, "question_id": "nNaVxPPGh6gJ7L8riokP7P", "question": "What object is the bus being used to transport?", "choices": ["mail", "blankets", "surfboards", "packages"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003630.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170679, "question_id": "nPXoEkMcBbbPMUB8uSAvMJ", "question": "Where was this photo taken?", "choices": ["outhouse", "dining room", "bathroom stall", "home office"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170679.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77452, "question_id": "nQGEbskow7JKAasAvQcEPY", "question": "What alcohol brand refers to the pattern on this animal?", "choices": ["good ale", "checkered cool", "red stripe", "monochrome malt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415846, "question_id": "nQqDfgMn9zXexcK2qNdM9p", "question": "What video game system is the person playing on?", "choices": ["playstation 4", "atari 2600", "nintendo wii", "xbox 360"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455080, "question_id": "nRFPqrKWxyiHHMEAMyJDo3", "question": "What does the item under the camera look like?", "choices": ["bee", "dog", "cat", "frog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1172, "question_id": "nRWN43Pvmj7kqj8oMQDSzF", "question": "What food does this animal like to eat?", "choices": ["kiwi", "bananas", "honey", "wax"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000001172.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68371, "question_id": "nSAwy5QghtfSYx5rdEdoJU", "question": "The street sign belongs to which city as indicated by the skyline of the buildings in its logo?", "choices": ["seattle", "san francisco", "new york", "vancouver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443582, "question_id": "nSWXj8q7JPPJPtS6doyXEw", "question": "What adjective can be used to describe the shower curtain?", "choices": ["run-down", "old", "flowery", "tattered"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4866, "question_id": "nT3T7gRJ4WgjoFr9K4UqWd", "question": "What is the profession of this person?", "choices": ["athlete", "electrician", "painter", "plumber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004866.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 300339, "question_id": "nTBraBUUon7VUm53mDrrFF", "question": "What fruit matches the color of the wheels?", "choices": ["lemon", "apricot", "pear", "strawberry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000300339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534599, "question_id": "nTFu2dKZbPDJLcczZdZzC3", "question": "What is the color of the road sign?", "choices": ["yellow", "white", "black", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534599.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388077, "question_id": "nUFcaKVaDt7MGMc7XpynXa", "question": "The dog is cuddling up to what?", "choices": ["baby", "snake", "stuffed animal", "woman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000388077.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373711, "question_id": "nURDBbyq6CambyFJCEZRDZ", "question": "Where is this woman?", "choices": ["carnival", "rodeo", "barn", "ski lift"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270349, "question_id": "nUz5JSFHSYCK9pbGTkrjwL", "question": "Where are the man and the horse?", "choices": ["ranch", "forest", "zoo", "beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105794, "question_id": "nV9dq7XtYpMSs7cFno7hYb", "question": "What brand is the mouse sitting on the laptop?", "choices": ["logitech", "microsoft", "apple", "samsung"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537609, "question_id": "nVKmXdSrXwmCSFxmbTnuND", "question": "What kind of kittens are these?", "choices": ["persian", "grey tabby", "calico", "siamese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537609.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514150, "question_id": "nVYdSmqTWxLBLphzMDS3jb", "question": "What number is at the top front of the bus?", "choices": ["205", "366", "380", "198"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329986, "question_id": "nVbr49NCrkV9o7dAmra4S9", "question": "What would be coldest to the touch here?", "choices": ["hands", "snow", "jacket", "hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329986.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321187, "question_id": "nW8kzxAKhneqcsUNJaSmx7", "question": "What is the man in danger of hitting?", "choices": ["boat", "board", "water", "cords"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000321187.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222202, "question_id": "nXFCmMs5KEZJRbgKEtLSgB", "question": "What number comes numerically after the number on the hydrant?", "choices": ["47", "37", "40", "65"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222202.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144636, "question_id": "nY6agwAP6PXkKy7MktVMAf", "question": "Where on the road can you find the following signs?", "choices": ["hill", "junction", "house", "roundabout"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000144636.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21311, "question_id": "nYvmMoec4bRVVQK5GJ2JRA", "question": "What kind of energy is used to power this train?", "choices": ["coal", "diesel", "electricity", "steam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021311.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435511, "question_id": "nZ2iHA8Zfe6yyXa5tyALQp", "question": "This photo appears to have been taken through what?", "choices": ["bagel", "peep hole", "hula hoop", "mug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126386, "question_id": "nZaUB83M24rK5WSxhoyqCR", "question": "To create a design or imitate a art is known as?", "choices": ["painting", "art", "writing", "drawing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000126386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501098, "question_id": "nab9JMraFwTPJ52SvcfGUv", "question": "Which person would have the most head protection from the sun or rain?", "choices": ["balding man", "left woman", "cap man", "rightmost man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501098.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498893, "question_id": "nb9inYbT3DreKSLumKXdcA", "question": "Which double letter appears on the front of the train?", "choices": ["ee", "bb", "dd", "cc"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498893.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379752, "question_id": "nbSLiNibMoQbrsfFDrf7nP", "question": "Which number has broken off from this keyboard?", "choices": ["seven", "nine", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556797, "question_id": "nd8jHcE8dcrQdgJapy5Rx2", "question": "In which state is this street corner located?", "choices": ["arizona", "florida", "california", "texas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556797.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275132, "question_id": "ndqU4q8qjGwCLg5sDjqjU6", "question": "What type of room is shown?", "choices": ["escape", "conference", "hotel", "waiting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515557, "question_id": "ndrFXVCW3ZB9Sjtb6tRgxB", "question": "How is the larger giraffe probably related to the smaller giraffes?", "choices": ["cousin", "parent", "sibling", "grand parent"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581342, "question_id": "ne2fLSnFJQnRnw2v37X8CQ", "question": "What are the headphones on top of?", "choices": ["box", "cat ears", "human head", "mug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581342.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465950, "question_id": "nebBa3uQJHfPqVhYHvTmgq", "question": "What is next to the castle looking structure?", "choices": ["windmill", "tiger", "bird", "bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465950.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467503, "question_id": "nf6BwTTLDHfyCCQstNmshr", "question": "Which type flag seems out of place inside this shop?", "choices": ["mexican", "spanish", "white", "american"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467503.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318342, "question_id": "nfEfLwmDzykyXo264QQihR", "question": "The elephant is at least how many times heavier than the human man?", "choices": ["500", "50", "1000", "100000"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000318342.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173926, "question_id": "nfTn2smumge422vyMnQWZu", "question": "What is the purpose of the object next to the toilet?", "choices": ["reading", "lighting", "pleasure", "cleaning"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000173926.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82009, "question_id": "nfxjoEQjERJfSDCMqhEDJv", "question": "What type of phone is he using?", "choices": ["landline", "rotary", "cellular", "corded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082009.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548553, "question_id": "ngTjhigKA7KdXjKbEVyrm4", "question": "What is different temperature-wise about the fuel this vehicle needs compared to ordinary gasoline fuel?", "choices": ["higher ignite", "freezing", "no difference", "lower ignite"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000548553.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50390, "question_id": "ngo9kBwkdbkF2mGCMSVybd", "question": "Which country is this airline based in?", "choices": ["germany", "united kingdom", "united states", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406902, "question_id": "nhRkj9BHkkzXRktzhKQuNa", "question": "What does this charity want to prevent?", "choices": ["war", "poverty", "disease", "violence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000406902.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260691, "question_id": "nhZAujLADzLFJaRPB7FJ2J", "question": "What will be the largest item in this room?", "choices": ["fridge", "car", "couch", "stove"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112607, "question_id": "ni5os5uUSt585b5cGBZ7JT", "question": "What is in the playlists?", "choices": ["movies", "music", "games", "documents"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527582, "question_id": "njnvb8sTqnBf9TXcBdfiLL", "question": "What are the green things used to make?", "choices": ["paper", "puree", "cucumber salad", "pickles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194861, "question_id": "nkGaCaZEDrz5UaquUo5mmq", "question": "What type of water is being surfed?", "choices": ["bottled", "salt", "tap", "pool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000194861.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445526, "question_id": "nnEdP4NxdmhtVV2U6JnhW7", "question": "What kind of transportation is shown?", "choices": ["road", "rail", "air", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445526.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178698, "question_id": "nnJrpCPyQNpQwzyxeHZEAP", "question": "How many meals could be prepared from the contents of the open refrigerator here?", "choices": ["zero", "one", "five", "65"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178698.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444707, "question_id": "nnVFTpFPqkJetk2jnG9fbh", "question": "What might cause this bird to starve here?", "choices": ["rain", "drought", "wind", "warm temperature"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176103, "question_id": "noFwfggvCi8NZQdCKMtCZK", "question": "Where might this car likely be parked?", "choices": ["dairy", "factory parking", "airport", "golf range"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176103.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97313, "question_id": "noQVhEKxBBwjD9WcWpv676", "question": "Where are these zebras?", "choices": ["jungle", "desert", "savanna", "farm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097313.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372505, "question_id": "novcdvFfUXtgRkyttcDcMT", "question": "Why is the bear holding a game console?", "choices": ["cameraman posed", "playing game", "is toy", "stole it"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000372505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196950, "question_id": "np2P2m6wWpmY4BxmzTj4eB", "question": "Where is the headquarters of metro bus?", "choices": ["alberta", "washington", "montana", "oregon"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000196950.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219131, "question_id": "npMgaWoEr8PviRVhT89sC7", "question": "What kind of animals are shown?", "choices": ["domestic", "reptiles", "aquatic", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000219131.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522974, "question_id": "npvKzsSQxk9awDG5MGhQyq", "question": "What is the possible price in dollars for the meal above?", "choices": ["ten", "twenty", "thirty", "fifty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522974.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403722, "question_id": "nr2rpq6p4qBxxD5xMmqp3T", "question": "How are the two people seated here related?", "choices": ["strangers", "aunts", "married", "inlaws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79078, "question_id": "nrweg5xmWqssJLeBswDNS2", "question": "His gesture signifies what?", "choices": ["negativity", "hitchhike", "sickness", "positivity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455919, "question_id": "nsyP2kgdYwbuCsTFFtBi5y", "question": "Who seems to have made the red card on the right?", "choices": ["grandma", "child", "adult", "doctor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455919.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 31800, "question_id": "nt2dCXUTbypeKZEweV6Srj", "question": "What type of transportation is shown?", "choices": ["road", "rail", "air", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000031800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35644, "question_id": "ntTzbrCjaKnksyoxHeuBbV", "question": "How many animal species are probably in this area?", "choices": ["one", "three", "two", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035644.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230285, "question_id": "nuNT3XhsC44VemehUYtNTv", "question": "In which geographic area of the United States is this train traveling in?", "choices": ["west", "midwest", "south", "northeast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376425, "question_id": "nvkgcVxFNgm5bDZss9G6DA", "question": "What is the zebra doing?", "choices": ["resting", "drinking", "foraging", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514286, "question_id": "nw37ctQoKF3d7fvYB6N9cJ", "question": "What kind of architecture is in this picture?", "choices": ["american", "chinese", "european", "african"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30782, "question_id": "nwc27omqbq5AheUifSPzTR", "question": "What is the cat doing?", "choices": ["swimming", "eating", "hunting", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030782.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202, "question_id": "nzYfUTTmYzhr7bWZyJbzsG", "question": "This building is considered the territory of what nation?", "choices": ["united states", "turkey", "britain", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000202.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6499, "question_id": "o34xWScxwehvJKz3zTjARh", "question": "What item is the same color as the dominant color of the bus?", "choices": ["cherry", "lime", "banana", "carrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006499.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298858, "question_id": "o35mqJ6HuQeejSeLajoULm", "question": "What has been done to modify the red street sign?", "choices": ["color change", "mercantilism", "shape change", "vandalism"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298858.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107055, "question_id": "o3LvHRFWHQmznBXeYhHHS2", "question": "Why are none of the lights lit?", "choices": ["not needed", "abandoned", "no power", "is broken"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107055.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120950, "question_id": "o3iy7jievyhEB2uzG95UYf", "question": "What kind of wood is in the fireplace?", "choices": ["birch", "maple", "pine", "oak"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120950.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364259, "question_id": "o4ke9oC7KbzRpkFPtQvZtx", "question": "What type of tower is shown?", "choices": ["cell", "lattice", "water", "clock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000364259.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377289, "question_id": "o525ES5bDF5HRJfz4sGDmi", "question": "What type of sandwich is this?", "choices": ["tuna", "club", "bologna", "blt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542648, "question_id": "o6JdxoA6yiTUcA2QrKJPHs", "question": "What was the first location to the left originally used for?", "choices": ["hospital", "stadium", "school", "stock exchange"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139287, "question_id": "o6Whw98kUdYMQMq5TJRuPA", "question": "Which Island state does the person holding the frisbee try to represent most here?", "choices": ["hawaii", "maine", "mexico", "california"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139287.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549811, "question_id": "o9RKrTyoBg7MLc7o9Zmqpk", "question": "What type of clothing is the person wearing?", "choices": ["bathing suit", "dress pants", "wet suit", "short shorts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451209, "question_id": "o9iQHzTJAjjAWmCfwtGAPe", "question": "What health problem does this person face?", "choices": ["obesity", "cancer", "back pain", "hypothyroid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513539, "question_id": "oA34hyQZVengVK5eYKErr2", "question": "What brand is the water?", "choices": ["aquafina", "dasani", "fiji", "nature springs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513539.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490082, "question_id": "oA7tYBCKocVPVZukGUBtA4", "question": "What could the baby giraffe be looking at?", "choices": ["ground", "sky", "food", "ostrich"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77309, "question_id": "oC73pvL4MmRjmoZ2hWP2wy", "question": "Which one of these sells this dessert?", "choices": ["dunkin donuts", "panda express", "cheesecake factory", "jamba juice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077309.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158967, "question_id": "oC8fS3ztDUbmURUYShUCHM", "question": "Why are her nails colored red?", "choices": ["match phone", "was attacked", "accident", "decorative"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000158967.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215267, "question_id": "oCn4uqmdfkHqc5p8kWdP93", "question": "What is the stick the rider is holding in her hand?", "choices": ["drumstick", "tree branch", "riding crop", "fly swatter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506317, "question_id": "oCtGMVRZUB9ziRSRUQjx7o", "question": "Which location would be exposed to the most sunlight?", "choices": ["ven interior", "front bench", "rear bench", "gazebo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506317.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404878, "question_id": "oDxrw29XmhujnkzBk8oLmd", "question": "What is a famous cartoon of this animal?", "choices": ["yogi", "pluto", "roadrunner", "donald"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404878.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515443, "question_id": "oEGMBnST3sujqeweAWPirf", "question": "Where are the giraffes most likely hanging out in the sun?", "choices": ["reserve", "zoo", "farm", "backyard"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000515443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 313740, "question_id": "oEkP9Z4x4QZozQRRWZT2zs", "question": "Why is the pile of stuff next to the zebra?", "choices": ["trash", "food", "hiding zebra", "nest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000313740.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258536, "question_id": "oFcVpAztKHJkW3JKBmLes9", "question": "Closing the lid reduces the spread of what?", "choices": ["liquid", "droplets", "sputum", "germs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258536.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501360, "question_id": "oFrYdHk88EVUBPRk8KpsS3", "question": "What action is the girl performing?", "choices": ["laughing", "cpr", "throwing", "embracing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501360.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223854, "question_id": "oGV8YrPwsQVEhFo7FJ8RBA", "question": "What does this animal have?", "choices": ["trunk", "wings", "horns", "pouch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332565, "question_id": "oGXeTmqqpEBsAtjPkMdBP5", "question": "What is likely the reason for their trunks curling?", "choices": ["breathing", "trumpeting", "eating", "drinking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201812, "question_id": "oH9kNDdi6hau4FYbGq38yj", "question": "The truck that transports the workers that use the yellow item is usually what color?", "choices": ["green", "blue", "red", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201812.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372796, "question_id": "oHfW32xXzKhRvPpbcQ9D5M", "question": "Why is this man wearing a wetsuit?", "choices": ["coolness", "keeping dry", "sex appeal", "uniform requirements"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372796.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184008, "question_id": "oK9Xa4WmjDD7vUfn24MmWj", "question": "What is the longest word on the sign?", "choices": ["cow", "market", "cannonball", "prestige"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9580, "question_id": "oL3hUHikeWEZRaDTpNfkst", "question": "The animal on the left has a visible what?", "choices": ["tail", "trunk", "horn", "tongue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17980, "question_id": "oLKXWh7HXRfFwqr64xSmv6", "question": "She is using the scissors to simulate what?", "choices": ["earrings", "glasses", "hat", "shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017980.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133824, "question_id": "oN8RY7mpNvQa438AzrzF7W", "question": "What company's office building is present?", "choices": ["panasonic", "sony", "nintendo", "hitachi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133824.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168302, "question_id": "oNYduVQQiaMutp2MjwMJ5J", "question": "This sign hopes to eliminate which danger to local children?", "choices": ["photographing children", "toy cars", "children escaping", "speeding cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115308, "question_id": "oQ86eGZcueznDsCz6vaBUV", "question": "Which one of these people would have difficulty eating this food?", "choices": ["lactose intolerant", "hemophiliac", "diabetic", "hypochondriac"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432080, "question_id": "oQUgHfU9QwcEtwnxwx4GkF", "question": "This is made up of what?", "choices": ["fiber", "plastic", "wood", "clay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 523476, "question_id": "oRZCoQreGBgmLWNzzkYwET", "question": "Based on color what would be the most commonsense name for this group of professional pilots?", "choices": ["white angels", "orange angels", "purple angels", "blue angels"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000523476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491491, "question_id": "oSBWJN8baUx5RpKtPNGqhF", "question": "What can be said about the skies above the giraffes?", "choices": ["mostly cloudy", "mostly sunny", "overcast", "partly cloudy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491491.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191063, "question_id": "oSgTrT8Pp6VBtkJ9iGSXh6", "question": "What kind of water is being depicted?", "choices": ["salt water", "ice", "fresh water", "brackish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11986, "question_id": "oSjerB4CqDT5qkpGxhb4py", "question": "What mode of transport is pictured above?", "choices": ["air", "sea", "road", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011986.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275134, "question_id": "oUNK24fMmQDrFKFNX24AU7", "question": "The sign is written in what language?", "choices": ["english", "german", "french", "cantonese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275134.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267562, "question_id": "oVtVwQLbC3N4zcpnVbicmZ", "question": "At which location is the area where this person plays frisbee?", "choices": ["cow lot", "city", "park", "farm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267562.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343362, "question_id": "234puJuixoLj3fhZv4poT3", "question": "What is the giraffe standing in front of?", "choices": ["mountain", "forest", "building", "man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381325, "question_id": "23Mvgy9kmBaVxLafpUm6MV", "question": "What is required for this activity?", "choices": ["snow", "ice", "wind", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381325.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306977, "question_id": "24ztCC9KV2oCdp3VR3xw55", "question": "What is done behind the curtain?", "choices": ["brushing teeth", "sleeping", "showering", "doctor visits"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482491, "question_id": "25TQ4KpUx2gHCarvh7tKKC", "question": "Unleavened dough of wheat flour is used to make?", "choices": ["cheese", "pasta", "macaroni", "ice cream"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482491.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119380, "question_id": "28HJY38YBHgfuDBHAiNCjn", "question": "The kitchen is heated in the winter by what method?", "choices": ["furnace", "fireplace", "radiator", "electric heater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119380.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155863, "question_id": "295qEwFwHJcdPNduUKz2mz", "question": "What is a group of these animals called?", "choices": ["flock", "herd", "murder", "celebration"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 70672, "question_id": "29NefuAiMFTaw7Mwt4WcnF", "question": "When using the toilet here where would a lady sit?", "choices": ["in hole", "near ceiling", "nowhere", "on ground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000070672.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310482, "question_id": "29kpacgmQbehkyUCzzq8yx", "question": "Between what can a gap which the tennis ball might be lost be seen?", "choices": ["posts", "legs", "racquets", "balls"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109757, "question_id": "29ymHT3GNJaFfUY7pq3x7D", "question": "What is the time displayed on the city clock above?", "choices": ["1110 pm", "1255 pm", "1110 am", "1255 am"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291159, "question_id": "29z8ZzUc2dsrxHXn9ZPvpX", "question": "Which of the following would be concerned to see this picture?", "choices": ["fisherman", "astronaut", "teacher", "environmentalist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291159.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283422, "question_id": "2ANT7dEAYFeV35EC8MHz8g", "question": "What food is the same color as the color of the man's tie?", "choices": ["banana", "lemon", "carrot", "lime"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438656, "question_id": "2BnmM3zLMLwm2scyde2oHC", "question": "What is this toilet display?", "choices": ["artistic expression", "being used", "foreign bathroom", "for sell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453436, "question_id": "2Fjcqz7t9VXdZTVt8VjHHQ", "question": "Why are these two together?", "choices": ["sleeping", "cameraman posed", "fighting", "mating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453436.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44222, "question_id": "2GYyKuVZCkZZb4Ks3Hav2g", "question": "If two people who didn't know each other came to this park area and sat down upon how many separate benches would they most likely sit?", "choices": ["one", "two", "three", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044222.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452290, "question_id": "2H2Z4ysVL8T5uvQ2J43Pcu", "question": "The ladies here are participating in an early form of what?", "choices": ["crosscountry skiing", "skeet shooting", "bowling", "racing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452290.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113027, "question_id": "2Kakvqn2WyGkH7Eg4ra2z4", "question": "Which one of these is most likely the setting?", "choices": ["prison", "beach", "park", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279851, "question_id": "2KpubZkeoYKhqRpJuZ4GkP", "question": "What is the man with the back pack on most likely using his skateboard for?", "choices": ["transportation", "looks", "competition", "tricks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549847, "question_id": "2Lyp5QNxPa2Fmfu2DMGvHu", "question": "What seems to be missing from this food?", "choices": ["cheese", "kiwi", "oranges", "cucumbers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549847.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197075, "question_id": "2NmpbpWSz2ZgwySe2uTeEe", "question": "In which way is the smaller mammal shown here related to the larger?", "choices": ["mate", "parent", "offspring", "spouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197075.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 568946, "question_id": "2NybNwRoPTBHHfNkvgKyZF", "question": "What fruit is in the tart on the left?", "choices": ["raspberry", "rhubarb", "orange", "banana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000568946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350551, "question_id": "2SUmFb7476nSkWF3eKSZah", "question": "What is inside the water bath?", "choices": ["empty", "person", "water", "soap"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498429, "question_id": "2V7MvrrokMsJU6WNHeNtvt", "question": "What item on this work is sharpest?", "choices": ["trunk", "ears", "tusk", "tongue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105489, "question_id": "2WxGmeMWVuNFzUs74FbCXk", "question": "Where are the pizzas?", "choices": ["porch", "living room", "dining room", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105489.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177307, "question_id": "2Z3rp95Y4eGypmXLcQyLQY", "question": "What number is closest to the number on the bike?", "choices": ["152", "75", "998", "168"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177307.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 405424, "question_id": "2bAHAiZzkraLzA6d5euR2M", "question": "What does this item store?", "choices": ["cash", "books", "food", "jewelry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000405424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329218, "question_id": "2ddQS9wCf89Ai4cJdkNjJd", "question": "Why is this man hanging sideways?", "choices": ["hijinks", "tradition", "fun", "strong wind"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000329218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223542, "question_id": "2fWuhQRZ3Pg3s7gKQwQXUR", "question": "What is unique between the kettle on the left and the shoes on the right?", "choices": ["design", "shape", "color", "usage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223542.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130408, "question_id": "2jnhrf6iRubLtSdSLEVN3Y", "question": "What can be said about the giraffe's neck?", "choices": ["deformed", "shaved", "long", "short"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119878, "question_id": "2k2gyQGDRiM3B3vmSY6eDV", "question": "What is the food mostly made up of?", "choices": ["fish", "lettuce", "cheese", "beans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119878.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374039, "question_id": "2k86Dt2NdyaNVNhScYawhF", "question": "In which continent is this building located?", "choices": ["asia", "north america", "europe", "australia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253114, "question_id": "2mGW93d2fD8xDk9jZZAPWH", "question": "Where are the bears on the picture above located?", "choices": ["ice", "ground", "one tree", "under tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253114.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 94125, "question_id": "2psvr7VqezkRx6SC7mJJyr", "question": "What month was this photo most likely taken in assuming it's taken in the northern hemisphere?", "choices": ["june", "tuesday", "december", "july"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000094125.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326355, "question_id": "2qXX2q9zCeHDApvB2Z8B72", "question": "The man most likely works where based on his attire?", "choices": ["docks", "law office", "fire house", "circus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326355.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324371, "question_id": "2tFE7jM94B47rK4RMtrYYx", "question": "Why is the darker horse smaller?", "choices": ["for riding", "malnourished", "camera angle", "is younger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000324371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156742, "question_id": "2tm6nMN2ZYAi7yfrC9kvpC", "question": "The statue looks like what creature?", "choices": ["gorgon", "yeti", "gargoyle", "sasquatch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342473, "question_id": "2uMcgQwQA34PwR7MRoLuK8", "question": "What sound does the animal that the cake is shaped like make?", "choices": ["howl", "baa", "woof", "meow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342473.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453564, "question_id": "2vhRz6esucvG2TE2SfmZib", "question": "What requires an electrical charge here?", "choices": ["tooth brushing", "showering", "hair brushing", "flushing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453564.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80607, "question_id": "2w9nVLpFXzuPtsCLvTcyLS", "question": "What is falling down on the left?", "choices": ["snow", "water", "sand", "leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200731, "question_id": "2xkLsAkN9PcYvYgTmdw2uR", "question": "Why is the stuffed bear here?", "choices": ["posed", "wandered off", "for sale", "discarded there"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000200731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 148779, "question_id": "2xsuzjaHZ58w53FXJsVZ5h", "question": "What is the name of the street?", "choices": ["commercial", "avenue", "7th st", "commercial ave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000148779.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399961, "question_id": "2yGxCpEB9ev3pyhLBUFDLU", "question": "Which of this elephant's features have been cut?", "choices": ["tusks", "hair", "nose", "mouth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399961.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385495, "question_id": "2yyTUsGdVoSNTBBp7PfLCA", "question": "The container with a pink top contains something meant to wash what?", "choices": ["back", "knees", "feet", "hands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385495.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439586, "question_id": "2zPnmc2Yi9hs76xmJDhmSv", "question": "What color fur is only on the cat?", "choices": ["brown", "black", "white", "rust"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395119, "question_id": "2zfhoijvF5xspwJ8NQa73w", "question": "What is this room used for?", "choices": ["exercise", "bathing", "working", "cooking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395119.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433257, "question_id": "32KLgEwsE6Kr7aFEHkyJtE", "question": "Which one of these types of music would this group be interested in?", "choices": ["edm", "classical", "jazz", "folk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160084, "question_id": "32L3VQRdUgyqvDTPxSh7f6", "question": "What material does the couch being jumped on feel like?", "choices": ["silk", "sand", "toilet paper", "velvet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155738, "question_id": "33fTzDNMqJTUofWeR2nQ5T", "question": "Where are the flowers placed?", "choices": ["nowhere", "in vessel", "above table", "ground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155738.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374682, "question_id": "34yQfcWL9WFpyoy69GgWfL", "question": "What are the floating things in the bathtub?", "choices": ["soaps", "rose petals", "bugs", "toys"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223802, "question_id": "35XkG42Ms8KjU32vsMw9KJ", "question": "What keeps the toy from being flat?", "choices": ["stuffing", "water", "sand", "marbles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167835, "question_id": "368ConRNEizc3qJ4weSfba", "question": "What is the pizza pie being served on?", "choices": ["wood pan", "metal pan", "glass pan", "plastic pan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455517, "question_id": "36bTvQkeW7bMA9yPCZNave", "question": "From which country/kingdom are the symbols used to indicate time shown here originated?", "choices": ["syria", "united states", "rome", "arabia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455517.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173192, "question_id": "36d8zZkGK8Soq9oNqTPu2r", "question": "How many bears do you see in the above picture?", "choices": ["six", "one", "three", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529334, "question_id": "37XJhkT7bX4jmaGTXwZ5J5", "question": "What country is this in?", "choices": ["mexico", "ireland", "united states", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529334.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205001, "question_id": "39zbUmfWSaKiDS4uw8TGYD", "question": "What event is held here weekly?", "choices": ["sermon", "singalong", "exam", "concert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205001.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109757, "question_id": "3BKBihd3SkbJrCmfMRND3e", "question": "In which country is the gold and black clock located?", "choices": ["united kingdom", "australia", "canada", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567218, "question_id": "3C2ETkG3CUmm6LGwNLhUWx", "question": "Where are the planes?", "choices": ["factory", "airport", "warehouse", "military base"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137340, "question_id": "3C6FL6kdV83gLgP2Sh5kZw", "question": "What does the cigarette box say that smoking will do?", "choices": ["kill", "satisfy", "beautify", "calm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562976, "question_id": "3CL7sbefcfftaqqgkM6Q3G", "question": "What is the swirl made of?", "choices": ["fudge", "toothpaste", "caramel", "sugar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562976.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79375, "question_id": "3Cja8qn96k9ggrUv7G9qdW", "question": "Which of the 5 senses are these creatures particularly astute in possessing?", "choices": ["tasting", "feeling", "smell", "sight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 513955, "question_id": "3D7h7aDVcz9KAZfdF7Nqoy", "question": "What is the main reason why they can't use this device effectively?", "choices": ["color", "sleepy", "age", "brain capability"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000513955.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158389, "question_id": "3DhPCkxGmXrzKeab8DUcry", "question": "Where is this appliance located?", "choices": ["office", "restauran", "home", "store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260666, "question_id": "3EKSpYEUeVp8ziRC3Yzqyw", "question": "What does the white tower help navigate?", "choices": ["boats", "planes", "birds", "horses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000260666.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135827, "question_id": "3ER3MuxMnbcskkVFGNiHmz", "question": "What is the red thing inside of?", "choices": ["house", "mouth", "box", "ground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135827.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279293, "question_id": "3Edih9rUn68EZfkDvFccd8", "question": "What kind of human construction are the zebras near?", "choices": ["building", "road", "jail", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279293.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137610, "question_id": "3Gwp4VLLpxyeimKaj4vWBw", "question": "What object would be grabbed first if someone was to step out of a shower?", "choices": ["soap", "faucet", "towels", "floor mat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137610.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298971, "question_id": "3Kyu5ctJf7ZjyWLPUAmUeA", "question": "What is the person who is most likely to be injured wearing?", "choices": ["camo pants", "green jacket", "purple cap", "white/gray jacket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54767, "question_id": "3LEAe2XiAWnQ9ZP9LmVP2t", "question": "What is the giraffe doing?", "choices": ["drinking", "walking", "sleeping", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50595, "question_id": "3LM8B7RukNMK3Km2kRg8dK", "question": "What does the gray container hold?", "choices": ["nothing", "keys", "shoes", "giraffe food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050595.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121171, "question_id": "3LtDssexvuSSM62VFXiSru", "question": "Both hands on the clock are nearest to what number?", "choices": ["nine", "three", "seven", "11"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121171.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4954, "question_id": "3M66YyfHqkpKoRDzFHVyJy", "question": "Which sport celebrates the bond between handler and dog by allowing them to work together?", "choices": ["skiing", "disc dog", "none", "kiting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004954.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441975, "question_id": "3M7BgZqV3LMsdbuaYLQKKD", "question": "How many people live here?", "choices": ["one", "four", "two", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107398, "question_id": "3MyFwR2EEc2rPW4kG9ZjVe", "question": "What is the man balancing on that is normally used to fix wrinkled clothes?", "choices": ["ironing board", "bed", "pillow", "bed sheets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230060, "question_id": "3NDn66snCCY4p2XaYu2ME3", "question": "Where was this picture likely taken?", "choices": ["stable", "meadow", "training ring", "house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230060.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32810, "question_id": "3PwGxrEtZW8HgnoyaBGuP7", "question": "What can be said about the zebra's hooves?", "choices": ["wet", "dirty", "clean", "injured"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032810.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107470, "question_id": "3QL7N3ubUMxSHmaaMzZ3LF", "question": "What metal is this likely to be?", "choices": ["iron", "aluminium", "silver", "gold"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201228, "question_id": "3RHVR2fGjUFySK9inAfzzL", "question": "How many places can a cat get water from here?", "choices": ["six", "three", "eight", "nine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466998, "question_id": "3S78x9EN8K5bYLSgc8nUdH", "question": "Why is the sawdust falling on the elephant?", "choices": ["hiding it", "is accident", "cleaning it", "is fun"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000466998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 132461, "question_id": "3SzED78iYjbq3FgA2GFLtJ", "question": "The toothbrush is powered by what energy?", "choices": ["manual power", "electricity", "battery", "solar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000132461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296167, "question_id": "3TWSR2LvXWraTYKJTg3of2", "question": "The outer ring will do what if it stays in the same location?", "choices": ["turn red", "fall off", "get brown", "stay white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296167.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276598, "question_id": "3TmVA3jAELtDcWFqJGCwdp", "question": "What word is on the right side of the bus?", "choices": ["thrice", "first", "enter", "vacation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276598.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362245, "question_id": "3Us35ctP7dAA6XZgaD4of6", "question": "What is the profession of the person who arranged these objects?", "choices": ["baker", "chef", "athlete", "florist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362245.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516701, "question_id": "3Vzhcjg8gi657qxc5sfATF", "question": "Why does the bear have a branch in its mouth?", "choices": ["for sale", "cameraman posed", "bring home", "hungry"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000516701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546578, "question_id": "3bDQcT2gjRyffAA6VUnHZ5", "question": "What is this plane carrying?", "choices": ["passengers", "fish", "cars", "packages"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53867, "question_id": "3eEXr9vSVGni4CbuAUAbym", "question": "What is on the kobo screen?", "choices": ["newspaper story", "novel chapter", "diary", "business letter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053867.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447412, "question_id": "3ebXrvCnVbvacgrpARnP9p", "question": "The small hand on the clock is closest to what number?", "choices": ["six", "nine", "three", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447412.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47895, "question_id": "3eeYSTDFhgeiRPBwbV2Fxs", "question": "Which people group introduced this ritual to the world?", "choices": ["celtic", "german", "egyptian", "romans"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000047895.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133382, "question_id": "3fYJMsGddzgqnqKHyyDjjC", "question": "What is the purpose of the swab?", "choices": ["clogging", "wiping", "smearing", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133382.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260485, "question_id": "3i83G2bnTbxciFfk7w8TkW", "question": "What hygiene is being displayed here?", "choices": ["podiatry", "fasting", "dental", "sterilization"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000260485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153133, "question_id": "3iwSzrqd9m6X3zLq22jBFg", "question": "In which one of these countries might you see this bird in the wild?", "choices": ["sweden", "israel", "mexico", "kenya"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153133.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449884, "question_id": "3jUS535yQf3A6Be4oxwvJa", "question": "Why is she wearing glasses?", "choices": ["operating", "health", "reading", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449884.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473053, "question_id": "3ji7ETVsVA4Z6FU9MKudNH", "question": "Where is the resting place of the object in his hand?", "choices": ["window", "wall", "basket", "desk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499739, "question_id": "3k8inAzYWhrEXQz86GdeRU", "question": "In this sport into the soup means inside the what?", "choices": ["sand", "wave", "foam", "deep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499739.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528296, "question_id": "3n24vTgLfhhSKpV9G5KcT6", "question": "What should he put over the furniture item first?", "choices": ["top sheet", "fitted sheet", "pillow", "comforter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528296.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197565, "question_id": "3pnzh5RMNKGDuhyKDphtUt", "question": "What countries former president is visible on this monitor?", "choices": ["mexico", "canadas", "australias", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125866, "question_id": "3qKrT6hLReL97nirnT3K6H", "question": "What do the animals seem to be in the water?", "choices": ["to cross", "to eat", "to bathe", "to swim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125866.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442683, "question_id": "3tvVm7odjcFysxVREkcgVi", "question": "Which branch is the bird closest to?", "choices": ["right", "top", "middle", "bottom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442683.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189733, "question_id": "3twX22w2FhT6U9Msy8k5Y9", "question": "What does the E shown here stand for?", "choices": ["east", "emergency", "easy", "enter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189733.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528306, "question_id": "3uM6KPTHqcC5U9Dun6orab", "question": "What are the people standing on?", "choices": ["feathers", "bones", "boxes", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 398000, "question_id": "3uMFLTMr97JxxCmPZquLnk", "question": "Why is this umbrella being used?", "choices": ["costume", "sun", "rain", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000398000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546182, "question_id": "3v6RcwiJjC2NXeecWuyz9G", "question": "What type leavening was most likely used to prepare this food?", "choices": ["molasses", "vegemite", "none", "yeast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153147, "question_id": "3wUgWdQJLHtCEsW3wqX5K6", "question": "What style of pants is the woman wearing?", "choices": ["skort", "bell-bottoms", "capris", "skinny jeans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257363, "question_id": "3xpe8ZQgo6f8UiYDUFVCCY", "question": "Why is the woman laying on the rose covered mattress?", "choices": ["to heal", "to sleep", "to eat", "to scare"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257363.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471720, "question_id": "3yYwYNPG6mzTUZsnux564Z", "question": "What is the young one of the adult bear above called?", "choices": ["calf", "kitten", "cub", "puppy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471720.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351786, "question_id": "3zFiZ2rkzJn2DoJF6Ksv4b", "question": "What word can be spelled from the letters on the train?", "choices": ["hi", "ad", "red", "go"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351786.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413032, "question_id": "42C6sMzXP6phuWDLQrXRRG", "question": "What is the white clothing item worn by the tennis player above his shoes?", "choices": ["bandage", "cooling pad", "socks", "ankle brace"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413032.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549408, "question_id": "42DhRgkvE7yvnRfouSvqUG", "question": "What activity is the horse on the left engaging in?", "choices": ["walking", "breeding", "sleeping", "grazing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444807, "question_id": "432shgiFHRN4pL2tB23ZVR", "question": "Which color shoes in the picture are made for women?", "choices": ["black", "brown", "grey", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444807.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 80523, "question_id": "43LVr8ynmZh4d5i7ndzsSf", "question": "What is the cat looking at?", "choices": ["dog", "mouse", "reflection", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000080523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21274, "question_id": "445yL3FVXn9vXnuHCCUTzD", "question": "What does the Jesus statue reveal about what the might be used for?", "choices": ["museum", "politics", "private residence", "religious purposes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021274.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69854, "question_id": "44iEUxpVVNuv4xmBK7gAzm", "question": "What creature is the little statue?", "choices": ["gnome", "sasquatch", "wendigo", "troll"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000069854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294075, "question_id": "45iak3BiJCF7nKisXZqH7k", "question": "What word can be used to describe this area?", "choices": ["destroyed", "fresh", "dilapidated", "pristine"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000294075.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304700, "question_id": "49qtbhP97hy3s264Memxsf", "question": "What animal's name appears on the sign?", "choices": ["cat", "dog", "falcon", "badger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304700.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152906, "question_id": "4AK4BRCigVDUigrjPjkhf9", "question": "This airline is named after a mythical what?", "choices": ["tiger", "eagle", "lion", "horse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90556, "question_id": "4CEsZ2B7Socrt5gHueSWs8", "question": "What is attached to the other side of the cables that the man is holding?", "choices": ["kite", "boat", "sail", "balloon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090556.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543072, "question_id": "4D5AyAeBBGbSQQZe4UR8AF", "question": "What can be seen on the water?", "choices": ["eel", "stingray", "shark", "boat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39660, "question_id": "4DEsCSgUKA6EfZmDe6vKVy", "question": "This bus is taking on passengers where?", "choices": ["rural", "no where", "city", "suburbs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66029, "question_id": "4EQtaaP6uNTsJoM7Udj8Px", "question": "What is required for this activity?", "choices": ["rain", "sun", "snow", "ice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066029.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176952, "question_id": "4EUiogawneSZGfVuXiRMQM", "question": "How many squares are there?", "choices": ["20", "eight", "16", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395561, "question_id": "4EvcTjfMeueprHFrGTocwN", "question": "How many people ar surfing?", "choices": ["one", "four", "three", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395561.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539907, "question_id": "4GKESMsK8SBb6vSVExQ9Cd", "question": "If someone needed a Pain reliever tablet where should they look?", "choices": ["behind mirror", "in switch", "under sink", "behind rack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435881, "question_id": "4Hf7n5YtvsN5aYPQpCGB2F", "question": "What type of fries are these?", "choices": ["crinkle cut", "waffle", "sweet potato", "curly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3907, "question_id": "4HgajS2VP664b9eiqqmnMk", "question": "What time period does the blue bus seem to be celebrating?", "choices": ["80s", "90s", "60s", "70s"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500276, "question_id": "4HpG6btYnAy3ecVzKXqgo6", "question": "The women on the cell phone would have to be what to be legal?", "choices": ["driving", "commuting", "parked", "in traffic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 489499, "question_id": "4Lm47yzxZ4ZC58Pyf9jYrv", "question": "What is the brown desk top neat the person made from?", "choices": ["wood", "glass", "aluminum", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000489499.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431815, "question_id": "4NhR6zrEtQpQ83cxoG96bz", "question": "What state is the city in whose name appears atop the round structure behind the train?", "choices": ["new york", "venice", "buffalo", "illinois"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431815.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259363, "question_id": "4RoLzPvA5rgSG3Ccbf5Gwr", "question": "What would be the best way to describe the area the kites are flying in?", "choices": ["urban", "rural", "downtown", "suburbs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259363.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251760, "question_id": "4S9tgDb7FQWx4MBghw52Cd", "question": "What celebrity would have been most familiar with the language that is on the sign?", "choices": ["anthony quinn", "cher", "falco", "marlon brando"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000251760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 300078, "question_id": "4SFo4KBxoee4r2qVPKiH4s", "question": "How would a person secure this clothing accessory?", "choices": ["buttons", "snapping", "nothing", "tying"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000300078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537147, "question_id": "4TaMGJ35NQKHeNRsiwqtUC", "question": "What material is this dog made of?", "choices": ["polyester", "cotton", "denim", "pic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365830, "question_id": "4V47SYybzgXKe4DXEhtSUc", "question": "What is crashing in?", "choices": ["monsters", "water", "animals", "boats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365830.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516072, "question_id": "4VQFyXMMniNTZqqSKX3nLr", "question": "What pattern shirt is the person wearing who took this photo?", "choices": ["check", "hound's tooth", "floral", "denim cowboy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267316, "question_id": "4WWicLzyA36uhCxc6w6kke", "question": "If you use this appliance make sure not to use which container for your food?", "choices": ["plastic", "glass", "metal", "ceramic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267316.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557661, "question_id": "4Xnqn8YfRqbwh7chuGcWfd", "question": "Why are there so many giraffes?", "choices": ["travel packs", "is zoo", "posing", "coincidence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557661.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397524, "question_id": "4ZnGtbYRSRdRd2vdQiY24q", "question": "Which one of these substances are these treats likely high in?", "choices": ["salt", "pepper", "oregano", "sugar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466655, "question_id": "4aTFmXB6mXsGQceXwY3qmt", "question": "What species are these stuffed animals meant to resemble?", "choices": ["rodent", "porcine", "ursine", "ovine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466655.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498469, "question_id": "4bsszAq6ZCykrVvZ4CatxX", "question": "What is the dog looking at?", "choices": ["snake", "horse", "apple tree", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262573, "question_id": "4cQTToNQKRBPE7MdxBtfBe", "question": "Where is this bathroom likely located?", "choices": ["restaurant", "hotel", "school", "home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192384, "question_id": "4chWDAJRhUdhYLnKBcgvCx", "question": "What is the green vegetation called that's covering the fire hydrant?", "choices": ["vines", "branches", "bushes", "algae"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192384.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23728, "question_id": "4eixuZsKQADhZg4cL4ssM6", "question": "What company operates this park?", "choices": ["universal", "disney", "palace entertainment", "hershey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023728.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 185960, "question_id": "4em6RuLKQk64FWDE8a4ewy", "question": "What is the weather like?", "choices": ["rainy", "snowy", "windy", "tornado"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000185960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536946, "question_id": "4g9KccKnbbEf6Ja4MdnLme", "question": "Crafts seen here are named for what?", "choices": ["horses", "tugboats", "men", "females"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 266158, "question_id": "4gwhj5AmpMVMV4VEjPhSsr", "question": "If these animals were real what would they be eating?", "choices": ["carrots", "hamburgers", "eggs", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000266158.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97241, "question_id": "4mLok6ArmjXLPKJkcA6UNs", "question": "What is the TRK an abbreviation for?", "choices": ["turkey", "trunk", "trek", "trike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317057, "question_id": "4o3QWgqz7TDq8Gz3oRfLDa", "question": "What internet slang is related to the word at the top of the meter?", "choices": ["ama", "hashtag", "dm", "epic fail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 392902, "question_id": "4q9BmXG7YdKJNPe3Q8dNCa", "question": "Which ingredient is used in the product the man is holding in his mouth?", "choices": ["oregano", "tobacco", "chocolate", "mint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000392902.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74198, "question_id": "4rpiHogCG4gisxDYuYYRmh", "question": "Which part of this conveyance must you manipulate to slow it down?", "choices": ["handlebar area", "wheel", "pedals", "spokes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154570, "question_id": "4rwM7yLHkk2S49QJK943KJ", "question": "The person here will glide along the water top by what power?", "choices": ["wave", "wind", "none", "motor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125611, "question_id": "4s7oFbTkuXA6jzWYHfkda9", "question": "What time was it fifteen minutes before this?", "choices": ["120", "125", "130", "1225"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125611.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336894, "question_id": "4uVLTWVJdGMjx4Ad3DvTrX", "question": "What type of transportation is this?", "choices": ["air", "road", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577094, "question_id": "4ui99nAHcoDZEKHVSdgKWJ", "question": "What color adorns the blue vase?", "choices": ["black", "purple", "pink", "orange"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577094.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227135, "question_id": "4zKA3t2woCPPNL4MzgeWTt", "question": "What color are these items when they are ready to eat?", "choices": ["black", "orange", "purple", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227135.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443560, "question_id": "53tGp7dvhcHJda4iFcCvej", "question": "Which is the Plus point in the Picture?", "choices": ["grey wire", "red wire", "blackwire", "steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443560.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222919, "question_id": "54ZEpqDVBYDaVSehVcuQai", "question": "What do birds migrate for?", "choices": ["weather condition", "breeding", "feeding", "habiting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222919.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116671, "question_id": "54kaz2aJ3Qp5PsogoHRYth", "question": "What animal is the elephant on the right approaching?", "choices": ["lion", "horse", "tiger", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116671.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184343, "question_id": "55GKaPdpYStjAQLqH7eUXh", "question": "What might the cows drink from if thirsty here?", "choices": ["water bottles", "trough", "river nearby", "stream"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581365, "question_id": "55Qa8RyMymMC8FP4KktGii", "question": "Which train is served during rush hours in the peak direction of Union T'pke?", "choices": ["e", "none", "both", "f"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581365.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96181, "question_id": "56cnGiYqpKbZL7tLfdTE9U", "question": "Which person has disguised where his eyes are looking?", "choices": ["left", "none", "right", "middle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53357, "question_id": "57fu4qBDXjMCPwgxytwCyG", "question": "Which thing is the animal here shown to more likely consume next?", "choices": ["grass", "pecans", "soil", "tree leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053357.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135669, "question_id": "57pRs3F52uiKVQWBNtTAt6", "question": "Why is are the Wii games lighter than the rest of the color on their case?", "choices": ["camera flash", "broken lamp", "spilled water", "bad painting"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000135669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485159, "question_id": "58DvBgBXtJtZAAeCDVP2VQ", "question": "What environment does a stork spend some of it's time in?", "choices": ["desert", "marsh", "swamp", "mountain top"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485159.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296081, "question_id": "59PQFUpZGEGEyfABi2cCfj", "question": "What kind of location is this kite being flown at?", "choices": ["ocean", "beach", "pond", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296081.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115708, "question_id": "59Wtg6Tbz5YrPqm6B6CMyH", "question": "What other color does this vegetable come in?", "choices": ["blue", "orange", "green", "pink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326608, "question_id": "5Bsk7MvCgx5PXCaxFSwxbK", "question": "How is the girl planning on traveling?", "choices": ["skateboard", "cab", "car", "pogo stick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 398513, "question_id": "5CSnuoG3LvnB6x5EMJ5ZbM", "question": "What is the object the cat is in commonly used for?", "choices": ["planting flowers", "collecting fish", "exercising", "eating soup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000398513.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277723, "question_id": "5CrurAM97tCbqiz2jJWEpc", "question": "What is the name of this animal in cartoon form in the movie Madagascar?", "choices": ["dave", "andy", "marty", "alex"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562945, "question_id": "5D9HjVqaAPD87JiY2VEbMn", "question": "What extraordinary quality might you find on this animal's face?", "choices": ["eyes", "tongue", "nose", "hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562945.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51670, "question_id": "5DQE7k7hRPi7rsmzFMT6rc", "question": "The name on the towel is the same as the name of a company that makes what?", "choices": ["windows", "wheelbarrows", "eggs", "tires"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463747, "question_id": "5DiLavr4pygrzh59qFfgFj", "question": "What is one of the older items in the room?", "choices": ["wall heater", "logs", "marble sculpture", "blinds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463747.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 208143, "question_id": "5FN5f9JrEakqL3ptz6nnya", "question": "What do the tassels on the tail of this animal deter?", "choices": ["flies", "gnus", "lions", "plants"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000208143.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247172, "question_id": "5GohTufPJhLFDKVanFm6o3", "question": "What number of the clock is partially covered by the big hand?", "choices": ["four", "six", "five", "seven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247172.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549656, "question_id": "5Gwga6YKTii9whRURfmBWH", "question": "Which way is the arrow pointing?", "choices": ["up", "down", "right", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156446, "question_id": "5HRUeVYPayf7LSDkwzcrE5", "question": "What is the person holding on her right hand?", "choices": ["bag", "cape", "briefcase", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524116, "question_id": "5HtyEoL9R82veAvztNE7Gw", "question": "What is the he boy using to communicate?", "choices": ["phone", "sign language", "postcards", "letter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524116.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263983, "question_id": "5Hv9WWittvhXSo6JDQM3Cw", "question": "What type of creature would normally wear these shoes?", "choices": ["dog", "lizard", "monkey", "human"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263983.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 312566, "question_id": "5KqFYyxmtzuUsZ3NBPFkJJ", "question": "What gesture can the woman in the yellow pants be seen doing?", "choices": ["thumbs up", "middle finger", "shaka", "peace sign"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000312566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503987, "question_id": "5PmvuiC87QqdAvuL4LWynB", "question": "How many cars are visible in this image?", "choices": ["one", "ten", "two", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503987.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253618, "question_id": "5Qhaqg27DpRL5z7yUftVuu", "question": "What part of the wave is the man surfing on?", "choices": ["trough", "foam", "crest", "trench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253618.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 335097, "question_id": "5Qz9kDDzY4QFdBkmtkchJe", "question": "Who is this structure meant for?", "choices": ["drivers", "mechanics", "police", "passengers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000335097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259385, "question_id": "5RM9XVZmefF8ymMkDcQpaF", "question": "What is the color of the cat eye?", "choices": ["yellow", "blue", "golden yellow", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259385.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344571, "question_id": "5RbUkY3SdueFyupENAgUN8", "question": "What is the man's dating status?", "choices": ["married", "single", "it's complicated", "divorced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153521, "question_id": "5RrjA4Bru4WUokKRXRWnzi", "question": "Which activity has most likely occurred at this table in the past?", "choices": ["doing math", "smoking", "interviewing", "buying stocks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139562, "question_id": "5ToU7MrYhkfyxVvKQbUhF3", "question": "Why does he have his arm out?", "choices": ["reach", "signal", "wave", "measure"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139562.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235860, "question_id": "5TwsGRYpVTaLhaAG3tyiqn", "question": "What vehicle is closest to the umbrella?", "choices": ["airplane", "tank", "car", "bicycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235860.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235910, "question_id": "5UkFgtxz8GFywGuNkpwWdM", "question": "What is the bear doing?", "choices": ["hiding", "playing", "resting", "foraging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235910.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69708, "question_id": "5XcBD53cdNaRxm8rnGQAQS", "question": "What does this device control?", "choices": ["television", "coffee maker", "car", "stove"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000069708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 228600, "question_id": "5XfSBNCWMGVwpxfkww6RES", "question": "What does the yellow line on the ground tell people?", "choices": ["warning", "lie here", "run here", "step on"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000228600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 287359, "question_id": "5XzQihFFYiqYudmmQHeQUZ", "question": "How is the woman's hair staying out of her face?", "choices": ["wind", "ponytail holder", "hands", "shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000287359.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399136, "question_id": "5YBbEtKw5NPZJTzg7rJwoM", "question": "What color is the short sleeve shirt of the man in the foreground?", "choices": ["orange", "purple", "blue", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548817, "question_id": "5ZXuDL3qnMCBmAhQXGtRxu", "question": "What is the giraffe putting it's mouth on?", "choices": ["car", "grass", "tree", "window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562436, "question_id": "5ajBkWSjzu3SJEuRK3AZmu", "question": "Which donut will leave the most crumbs?", "choices": ["chocolate frosted", "white-glazed", "cereal-covered", "nut-covered"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562436.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576090, "question_id": "5c2auYNaivu29Mj5PKFKBq", "question": "How would one reach this structure?", "choices": ["boat", "car", "train", "swimming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576090.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205001, "question_id": "5cvtgQe5WXk3G9iXBP76aA", "question": "What day of the week is the open house?", "choices": ["wednesday", "thursday", "monday", "friday"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205001.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317314, "question_id": "5djqRRSAtrbtfWArjuDBxz", "question": "What is this appliance used for?", "choices": ["frying", "baking", "boiling", "cooling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 509206, "question_id": "5eNfV8yykjLTZvYhysWRRZ", "question": "What could cause the train to crash like this?", "choices": ["another train", "derailed", "car", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000509206.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541307, "question_id": "5ejD4aQbnVfTt33FDqG2bz", "question": "What is being done to the back wall?", "choices": ["being demolished", "being dusted", "getting painted", "being cleaned"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541307.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481339, "question_id": "5fCvcCHeFe6rskTBwLeDt4", "question": "What portion of the keyboard is the dog resting on?", "choices": ["number pad", "space bar", "caps lock", "mouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415003, "question_id": "5fwHqW5E6o3WPUomeRGiZW", "question": "Why are different company's names on the skier's uniform?", "choices": ["designers", "skier's companies", "fashion", "sponsors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415003.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 522063, "question_id": "5hwaZm54r9r7uM2ESpPema", "question": "Why is the person blurry?", "choices": ["bad film", "is falling", "unsteady photographer", "moving fast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000522063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253423, "question_id": "5i99By8bd9ALL73xMzRxNF", "question": "What are the green signs hanging from the silver pole called?", "choices": ["delay markers", "relay markers", "street signs", "warnings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275575, "question_id": "5jV8NDgAB9ApxtfM3yx3su", "question": "The animals shown here became endangered because of a past mania for which body part visible here?", "choices": ["trunks", "tusks", "eyes", "ears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501489, "question_id": "5mL573DPNnSmSYMQ6nvdws", "question": "The three persons visible here await what?", "choices": ["sunset", "dinner", "shark", "wave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501489.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268829, "question_id": "5mzukgrvLgWVo2qhMQx3Cv", "question": "What is the car doing on the couch?", "choices": ["eating", "playing", "sleeping", "fighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366467, "question_id": "5pMNJBWxpcsgkdCfzGE4Qt", "question": "What part of the United States is this location in?", "choices": ["northeast", "northwest", "southwest", "east"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366467.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126616, "question_id": "5qkmfnBqBE6PWBfVpskZcG", "question": "What is the man doing on the ground?", "choices": ["posing", "eating", "exercising", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126616.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230729, "question_id": "5r8WNTaPeX8mTXS6NVeuVw", "question": "What is the brown area on the wall seen on the right building made from?", "choices": ["bricks", "sand", "steel", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495151, "question_id": "5rYz7cC6mALucuhXaKbT2L", "question": "Who is the drink for?", "choices": ["sheep", "mother", "baby", "nobody"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468887, "question_id": "5sMsyrAZX8cGmqAPn9mNhs", "question": "What can one see when looking at the screen here?", "choices": ["stock prices", "mirror", "e mail", "cat reflection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102964, "question_id": "5soQhLvyQRX85dfeMbaU4Z", "question": "What is keeping the large animal contained in one area?", "choices": ["barbed wire", "metal fencing", "glass panels", "stone wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102964.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339282, "question_id": "5tBoFQWphUU9wa7Huv4qys", "question": "The animal here hope to smell what?", "choices": ["deer", "food", "perfume", "body odor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384246, "question_id": "5uNknEwA6pzHdRK56XmXvM", "question": "What is a famous type of this dish?", "choices": ["swedish", "hawaiian", "english", "french"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384246.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395126, "question_id": "5vViWB8diLuCr9gk3ypB8r", "question": "What is the toilet seat in the shape of?", "choices": ["seashell", "boat", "horn", "theremin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395126.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 185960, "question_id": "5vgb22Co32MME4ALkNGxG2", "question": "How are these objects controlled?", "choices": ["magic", "computer", "string", "remote"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000185960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221544, "question_id": "5zNZkfnHMwujaMWcgcAD2m", "question": "What do the zebras here seek?", "choices": ["photos", "food", "selfies", "praise"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333757, "question_id": "5zf2CJUa6ZpTd4Zrs753U4", "question": "What number is the small hand of the clock is pointed to?", "choices": ["five", "six", "three", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544758, "question_id": "62HDrRXEDr7NZKTcpnvhBZ", "question": "In which country is this bus located?", "choices": ["united states", "germany", "argentina", "mexico"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544758.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251099, "question_id": "62ZcYfsQEEzSyf4toR6JTw", "question": "What type of head covering did this man have?", "choices": ["baseball hat", "fedora", "visor", "cowboy hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251099.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63621, "question_id": "638DXKZXLiJEBEeDQhiJcT", "question": "What is the blue object on the shelving unit?", "choices": ["feather boa", "stole", "scarf", "veil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063621.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 523431, "question_id": "65jrfVeVQGv7nAaS2CJpVc", "question": "These cows belong to the owner of what most likely?", "choices": ["lettuce farmer", "dairy", "meat packer", "no one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000523431.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390045, "question_id": "663iRghdQNr3DUr2AsiCLe", "question": "What item would you operate with the grey item?", "choices": ["television", "car", "lights", "oven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390045.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110479, "question_id": "67SpkUydwvZh98DTP4FEMC", "question": "Which notice would be most visible?", "choices": ["no standing", "dont walk", "walk", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248442, "question_id": "68SUTMupDenjGbiQSGHWmC", "question": "How many different materials are the benches made out of?", "choices": ["three", "two", "one", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248442.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234178, "question_id": "68r8tS5EPoShgkLC3fY5v7", "question": "What animal do you think this bird is sitting on?", "choices": ["zebra", "giraffe", "hippo", "alligator"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000234178.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259996, "question_id": "68tzVmNQhHZQs77zWTHsiY", "question": "What type of doll is this?", "choices": ["puppet", "china", "fashion", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473446, "question_id": "6ALkpWxZsQW3swtFJEW6WW", "question": "What is on the bottom of the bear's foot on the right?", "choices": ["smile", "sun", "heart", "year"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337012, "question_id": "6Ae6xuLbeXb3ZevGLLuZoW", "question": "What is this bear ready to do?", "choices": ["kick", "drop", "pick", "throw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337012.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48526, "question_id": "6BRYTYFwsFwkmhKnHQFDiM", "question": "The convertible carrying equipment to load onto the jet plane is from which country?", "choices": ["united states", "france", "germany", "england"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048526.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551017, "question_id": "6Bncc6xzeUPH7gHnWzMvcp", "question": "Which country's flag is on the right-rear of the truck?", "choices": ["germany", "canada", "united states", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551017.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521350, "question_id": "6BvQ45wZ25CPsEbLGBfzpa", "question": "What number is closest to the number of zebras there are?", "choices": ["three", "11", "20", "40"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521350.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467364, "question_id": "6BvU2tAxu5PmjdW43mLVzi", "question": "Which direction is this plane going?", "choices": ["back", "leftward", "upward", "rightward"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382922, "question_id": "6DH6L8nnuBkid9DAAJTxfs", "question": "What is usually in the place of the teacups?", "choices": ["candy", "playing cards", "aardvarks", "numbers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333274, "question_id": "6EB4mHuEFqzv9mUc94ExkB", "question": "In what year did the singer of this song die?", "choices": ["1999", "2009", "2017", "2015"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333274.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 480309, "question_id": "6EFuUGumkFtB2sTLmKyCcw", "question": "What flavor will the fruit add?", "choices": ["sweet", "sour", "spicy", "salty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000480309.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38150, "question_id": "6EevuzJAYGHZxQzqTVjiXX", "question": "Why is the one zebra so much smaller than the other?", "choices": ["malnourished", "is younger", "is abandoned", "is closer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560116, "question_id": "6EiiJkgQp2LQhiypMazToT", "question": "Why is he on top of the wave?", "choices": ["cought there", "better ride", "fell there", "landed there"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560116.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381473, "question_id": "6EmDMdL9UvNcj3LYicfEaC", "question": "What is trailing the bear?", "choices": ["wolf", "snake", "cat", "rat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381473.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511761, "question_id": "6EwxuzpVbnTwa7QuVrZaCV", "question": "What is the tallest structure above the house?", "choices": ["solar panel", "chimney", "flag", "antenna"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511761.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170255, "question_id": "6FYXMtFnTXuvG9FuB74tLH", "question": "What can this utensil be used for?", "choices": ["scoop", "flip", "strain", "cut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170255.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296643, "question_id": "6H2PT2n3hmypPmBtt5Mh6o", "question": "What is the man holding onto while surfing?", "choices": ["boat", "blimp", "helicopter", "kite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296643.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60231, "question_id": "6KoS8ZXgNvWtTyX9FscJkn", "question": "Why is she feeding the bottle to the dog?", "choices": ["dog likes", "dog thirsty", "for camera", "dog hungry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3905, "question_id": "6MpRBQoBxsMkt8eunHCcYH", "question": "What have these sheep most likely been doing?", "choices": ["walking", "eating", "sleeping", "drinking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003905.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245136, "question_id": "6MprjDoexvsq9VAox4m6UH", "question": "To which airline does this plane belong?", "choices": ["united", "continental", "southwest", "delta"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389377, "question_id": "6Px9CC2xyByHpZbiMVdMbR", "question": "What group is the dish's meat in?", "choices": ["beef", "seafood", "poultry", "pork"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219044, "question_id": "6QpAZkEovzxwfF4sq2wgja", "question": "What hour hand is the clock closet too?", "choices": ["one", "11", "12", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000219044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 287750, "question_id": "6Qpfkw6eU29hs9EJjGuPFk", "question": "Gathering of these items is referred to as what?", "choices": ["harvest", "collecting", "poaching", "fishing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000287750.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540631, "question_id": "6RhUbxZP4VkFX3RS73ym7E", "question": "Where is this being served?", "choices": ["gourmet restaurant", "diner", "fast food", "pub"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373680, "question_id": "6Ro9FovrZBCVX5ag4HmRVb", "question": "Which natural material is most prominent in architecture seen here?", "choices": ["tar", "grass", "stone", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97163, "question_id": "6ShYRnRr4kss4N5Qq7jte6", "question": "Where is the person at?", "choices": ["sky", "water", "land", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097163.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431338, "question_id": "6TY8z8cfBNLfhAS5zRNeDB", "question": "What's wrong with the signs?", "choices": ["wrong language", "they're wet", "nonsensical", "broken"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431338.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547235, "question_id": "6TftfNGb9iyNJjJBuHSYe2", "question": "What can be said about the animal's ears?", "choices": ["floppy", "droopy", "perky", "small"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551968, "question_id": "6XnZNQZt5MWjYZXuBx86e3", "question": "What will come up if the user clicks on the magnifying glass?", "choices": ["virus", "map", "spam", "plane fare"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293381, "question_id": "6XtQvp7PV6ofEDhpRr9wKo", "question": "The items here are controlled by entities located where?", "choices": ["above them", "below them", "drones beside", "no where"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293381.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77459, "question_id": "6Y4Ww3DubwN7VMddhQtHUK", "question": "What tool would most likely be used to assist in cleaning this clock?", "choices": ["pipe", "lift equipment", "concrete mixer", "welding machine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25836, "question_id": "6YysoxQ9jCLzAFHoYkwirN", "question": "What skateboard move is the man doing?", "choices": ["grind", "kickflip", "ollie", "superman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025836.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 405938, "question_id": "6Z54epmeGbZhfxfPd9RbyS", "question": "What does this animal rest it's paw on here?", "choices": ["surfboard", "skateboard", "yo yo", "bike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000405938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483511, "question_id": "6ZbpeCxjwDchz3AFcVaXjA", "question": "The cat is protected from what possible event?", "choices": ["tornado", "theft", "rain", "dog attack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254871, "question_id": "6Zd4H7qM6N2mi2gZNbt9pG", "question": "What actress has a last name that shares the first four letters on the sign that come after MS?", "choices": ["michelle dockery", "mia sara", "jane levy", "jessica biel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200388, "question_id": "6avqCXeuhNfAuLNY6rEaZL", "question": "Where should they put this item to preserve it?", "choices": ["oven", "dishwasher", "fridge", "dehydrator"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373505, "question_id": "6bGLdD3fvLezSWi2T2TFyg", "question": "Where is this animal located?", "choices": ["zoo", "circus", "wild", "fair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35051, "question_id": "6cXiWFimvy7nPvwLeFwihB", "question": "What sort of book is the one seen here most likely?", "choices": ["porn", "bible", "encyclopedia", "cookbook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 568375, "question_id": "6g38gFs6yKjAT2u7ACaeZJ", "question": "What is being used to control the large brown dog?", "choices": ["treats", "leash", "shock collar", "muzzle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000568375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81130, "question_id": "6hYmQ4wWSJuQjstfwBonM6", "question": "Which of these is closest to the ground?", "choices": ["gloves", "helmet", "shoulder", "knee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081130.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51870, "question_id": "6hxnRPdcnmx5CbRqBQy6ri", "question": "What is required for this activity?", "choices": ["fire", "ice", "snow", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051870.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499022, "question_id": "6iYqS3mVL6FwVgM9mGUUNN", "question": "What kind of vehicle is driving this train?", "choices": ["caboose", "boat", "locomotive", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499022.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337764, "question_id": "6kEww9MgyCgs9ai4st7tz2", "question": "What is the horse doing?", "choices": ["hiding", "resting", "attacking boy", "pulling boy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 43259, "question_id": "6u992K97HteN3bdQ6X8djF", "question": "What is he doing?", "choices": ["slipping off", "stopping board", "posing", "falling down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000043259.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45797, "question_id": "6ukhWEACT6JyZuHEviCwaS", "question": "What do people associate this room's name with in a house?", "choices": ["study", "kitchen", "living room", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045797.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391387, "question_id": "6uxs5zqwDV2Nyp3tHzXYFe", "question": "What is keeping the remote in the air?", "choices": ["tape", "hand", "nothing", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391387.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250627, "question_id": "6xsoBRK22DkTR4xpha7FPt", "question": "What type of sweater would you wear with his hair?", "choices": ["cotton", "silk", "polyester", "wool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000250627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462299, "question_id": "6ydWjaAwHkKVvxX8DoHAAR", "question": "What are the items above commonly made of?", "choices": ["none", "loam", "sand", "clay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462299.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292006, "question_id": "6zYjrbJuRhCKm6fgvCCQk2", "question": "Horse tail is adequately made up of?", "choices": ["fiber", "tissues", "protein", "iron"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292006.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 266733, "question_id": "6zrtuKTz9qJtWj8AeAmNtC", "question": "What type of area is shown?", "choices": ["arctic", "commercial", "residential", "coastal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000266733.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395808, "question_id": "72syLJxPciyVoCAERLCs4K", "question": "The item in the blue container will clean where?", "choices": ["between teeth", "between toes", "under arms", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395808.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192946, "question_id": "73gprKFmv5PUxqTxjaFHVF", "question": "What type of animal is on the bench?", "choices": ["rabbit", "dog", "cat", "squirrel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156198, "question_id": "749PZzbb9raknRENtTzJKg", "question": "How does the grass feel?", "choices": ["dry", "dead", "wet", "frozen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499380, "question_id": "75TTBTRyrgvnsYFKLtYhkc", "question": "Which body of water are they entering?", "choices": ["lake", "pond", "ocean", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499380.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461047, "question_id": "75fTzzZMRS7SDP4DbnLddt", "question": "What type of shot is the woman about to hit?", "choices": ["backhand", "dropshot", "serve", "forehand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190005, "question_id": "76DTMxHCeKFHWyfp44GRHE", "question": "What food dish might this animal become?", "choices": ["chicken tenders", "rice", "chocolate cake", "pork chop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190005.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283098, "question_id": "76qX2itcLbSxCUtTjPCAkw", "question": "What healthy habit likely happens in this room?", "choices": ["hygiene", "exercise", "sleep", "diet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283098.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441279, "question_id": "78Za9dKfsSp7TgbkPDGUv8", "question": "Which one of these clothing types would be out of place in this scene?", "choices": ["wetsuit", "speedo", "tuxedo", "bikini"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381451, "question_id": "78ZocZhLPC4GvaCsZ669WV", "question": "What is the homeowner especially fond of?", "choices": ["windows", "drapes", "vases", "candles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381451.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211814, "question_id": "7967qVrBnvy9JKPfEMGfSM", "question": "Based on appearance how difficult has the elephant's life been?", "choices": ["fun", "relaxing", "easy", "hard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211814.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92073, "question_id": "79gPtLgQY5Vy9FNHkAhoH8", "question": "Why are the birds flying in different directions?", "choices": ["being attacked", "are fake", "confused", "hungry"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000092073.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59018, "question_id": "7ANXXSzGUvDLQtot638Hq3", "question": "What type of sign is shown?", "choices": ["traffic", "brand", "historical", "price"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059018.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478703, "question_id": "7BELPCojQbXdD7ecDPJvYo", "question": "What effect might the sink have on the animal shown in it?", "choices": ["upsetting", "derivative", "irritant", "cooling"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000478703.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137565, "question_id": "7BScbsmHTpdUyYqsw5GHzx", "question": "What do you need to do to answer a call on this phone?", "choices": ["touch screen", "smack front", "pull antenna", "flip open"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450986, "question_id": "7Bqp7kKLFFZdPQdwz3JDA5", "question": "Who claimed to be the first to make this type of pizza?", "choices": ["sam panopoulos", "julia child", "john schnatter", "gordon ramsey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450986.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170009, "question_id": "7D62gkvtBL6tCxjv8k8E8r", "question": "What can be said about the photographer of this image?", "choices": ["threatening", "spotted", "endangered", "hidden"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000170009.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485104, "question_id": "7DLQ5qJ3NZAZJVQEMxvyvn", "question": "What is one obvious cause of his homelessness?", "choices": ["mental issues", "foreclosure", "illness", "alcoholism"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 287271, "question_id": "7EFu9Zw4xYeGbR2iU94J35", "question": "Who is the author of the children's find the hidden character book that has the same name as the name on the sign?", "choices": ["martin handford", "kim richards", "james d'arcy", "jay thomas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000287271.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490807, "question_id": "7FFH2NzAzHchm8CjMkWodP", "question": "How might the smaller bird most likely be related to larger?", "choices": ["parent", "sibling", "chick", "strangers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490807.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232798, "question_id": "7Gv3PNQg6xexCDSJUBeWXx", "question": "What object is in most danger of being pushed off the bed by a cat?", "choices": ["laptop", "grocery bag", "power cord", "black bag"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439953, "question_id": "7J2APnpu9ycMvXxVLrsy6t", "question": "What is this tower used for?", "choices": ["time", "water", "light", "cell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439953.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535149, "question_id": "7KKuphvR5ZVHkCb9Fdgfxa", "question": "Which human language does this animal most likely understand fully?", "choices": ["chinese", "none", "korean", "english"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535149.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333027, "question_id": "7KT236iDTJ7AYDcTZZDH7U", "question": "What company made the top row products?", "choices": ["amazon", "ibm", "apple", "microsoft"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521358, "question_id": "7Kt4yk3r9XQouC75kcwsp4", "question": "What is the animal here most likely to do that might injure the person near it?", "choices": ["flick ears", "nothing", "run", "kick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4127, "question_id": "7LZULuvjJbRsD5ATLjqLkW", "question": "The numbers are displayed how?", "choices": ["roman numerals", "hieroglyphics", "kanji", "cuneiform"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209612, "question_id": "7QkREaBZ45oCr9bFcHowRG", "question": "How will this person get home?", "choices": ["ride horse", "uber", "walk", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209612.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376860, "question_id": "7QuN8nEoMSLRnb62qbzjrY", "question": "In what building is this bathroom located?", "choices": ["gas station", "museum", "motel", "private home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376860.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175578, "question_id": "7Tbx49oXRyY7kCCQw8ZZCu", "question": "Where is the red bus going to stop next?", "choices": ["community center", "state park", "arriva road", "becontree heath"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342053, "question_id": "7UQuWQBzSCTiptMwWTZ2LP", "question": "Which room is directly opposite this room?", "choices": ["porch", "bathroom", "kitchen", "bedroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129252, "question_id": "7VAUUdnYbVTjT9KKpsQLAX", "question": "Which animal is in least danger of being eaten by humans if caught in the wild?", "choices": ["goat", "all equal", "zebra", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000129252.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351821, "question_id": "7Wyq9WrPaNv8fW4HesrrKi", "question": "What type of transportation is shown?", "choices": ["rail", "air", "water", "land"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351821.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341211, "question_id": "7YPL5Df9ouWQjdsXsDKVPu", "question": "What does the yellow part of the sign tell drivers about this part of the road in relation to the rest of the highway?", "choices": ["merging into", "slow down", "going up", "being dropped"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277754, "question_id": "7YwRz8Vtizw289As2TxCd4", "question": "The larger inflatable is meant to look like what?", "choices": ["air mattress", "dog", "fish", "human"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277754.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195242, "question_id": "7awYMSnHMzkdXJYgF2JiFx", "question": "How many people in the body of water are filming the surfing?", "choices": ["none", "two", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201730, "question_id": "7b44Gf8KEeHRZwALVUhJmd", "question": "What country has been assigned the telephone country code that is the same as the number on the train?", "choices": ["netherlands", "india", "seychelles", "united kingdom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201730.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100360, "question_id": "7bfbEARmVLDm5nogAdwZ2L", "question": "What role does the dog play in this relationship?", "choices": ["pet", "predator", "prey", "master"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100360.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449943, "question_id": "7cE2YXXzSm3f8SknfP3pYG", "question": "Why does the giraffe has his arms apart?", "choices": ["balance", "reach", "break fall", "gesture"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500308, "question_id": "7e5XmvSZnMGUakR5cqCkGX", "question": "Where are the cups located?", "choices": ["no cups", "in fridge", "outside fridge", "on fridge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196767, "question_id": "7evH3TEHJ2zrNFi3oS9UiM", "question": "Why is there a number on his chest?", "choices": ["in line", "meeting", "competition", "race"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25324, "question_id": "7fJqUBLhYFUSEZqobyYKLM", "question": "The bird eats something here that was placed here by what?", "choices": ["humans", "trees", "pigeons", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025324.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 237311, "question_id": "7hNLZaApd9SZygtAsNTFr6", "question": "What type plant produced the yellow items shown here?", "choices": ["rose", "wheat", "none", "bamboo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000237311.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230719, "question_id": "7iqsUP5PsRbzgUeN36WTrW", "question": "Who operates this train?", "choices": ["amtrak", "cp rail", "union pacific", "via rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326444, "question_id": "7j3eCow6idvqgMuTGqrikc", "question": "What is the typical name of this kind of concrete area?", "choices": ["pool", "walkway", "square", "park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326444.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76435, "question_id": "7kYYs4rtvuUybwuBdcKCmH", "question": "Which country's citizens eat the most of this dish per person yearly?", "choices": ["italy", "united states", "canada", "norway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202014, "question_id": "7mTfwa28g7Bpk3zAMC7EWC", "question": "His facial expression can be described as what?", "choices": ["scowl", "smile", "surprised", "blank"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000202014.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20674, "question_id": "7owjjPhjdExBC5wirV7LDo", "question": "What is the company that makes this item known for making?", "choices": ["donuts", "computers", "slippers", "automobiles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105013, "question_id": "7pytfvSb3MraKMSTsEQA43", "question": "What is she brushing?", "choices": ["hair", "teeth", "toes", "nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 261408, "question_id": "7sWMRiHNU8PLKTx7XUB7zx", "question": "What is on top of the treat?", "choices": ["candy hearts", "gummy bears", "salsa", "chocolate chips"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000261408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 284745, "question_id": "7tZfpCd7jSTX7Yxen7624F", "question": "The animals here go near the water for what purpose?", "choices": ["to fish", "swimming", "drinking", "diving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000284745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158925, "question_id": "7tmoospxrAFBgk7M2Z9Apu", "question": "What do you call the objects holding the sandwich together?", "choices": ["toothpicks", "knife", "tongs", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231070, "question_id": "7vjB44B99NvQ3XD7ZsqwhR", "question": "What country does the words on the side of the train indicate it is in?", "choices": ["australia", "japan", "china", "korea"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100421, "question_id": "7wHZu5EYnNPh6F9hev9ebK", "question": "What is in the white glassware?", "choices": ["soda", "berries", "cherries", "sauce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437886, "question_id": "7zS2J5MyQCjVV98RwC6DCk", "question": "What is lighting the man?", "choices": ["lamp", "sun", "streetlight", "spotlight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000437886.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67922, "question_id": "82U5fgndfnqwSgUyBGmt4z", "question": "What is being used as the base of the sweet treat?", "choices": ["cookie", "cone", "bread", "waffle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000067922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453234, "question_id": "82oK7bnCY3QkDRNhAKYM5D", "question": "This company is named after the what of its founder?", "choices": ["home town", "sister", "last name", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257095, "question_id": "84sbRuqBNCigBJS9dM3uRc", "question": "What is the cup made from?", "choices": ["glass", "paper", "plastic", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527849, "question_id": "86jGdwjN5jHrG3Rkexje2N", "question": "Besides pipers what other instrumentalists will be present at the competition?", "choices": ["violinists", "trumpeters", "pianists", "drummers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527849.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163781, "question_id": "86nRLR4f3ozBMShocWPAuY", "question": "Which one of these is a variety of the white item?", "choices": ["lima", "basmati", "shallot", "cauliflower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351674, "question_id": "87ciTdwJjKmw36JL5CUu86", "question": "What is the same color as the woman's hair?", "choices": ["ant", "smurf", "cherry", "banana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119823, "question_id": "88QWttAxbEqhp9pwbUz8hZ", "question": "What kind of animal is shown?", "choices": ["wild", "aquatic", "domestic", "reptile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417066, "question_id": "89QZyh86dVjEJq2Jdm2gzu", "question": "What type of fixtures are these?", "choices": ["plumbing", "lighting", "milling", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2161, "question_id": "89YphRxistoafFSrw2Kgee", "question": "What weather is necessary for this sport?", "choices": ["rain", "thunder", "wind", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432087, "question_id": "8AKXFRfwYXytgxWiAFPKJx", "question": "What type of transportation is shown?", "choices": ["rail", "water", "road", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323579, "question_id": "8BJ7j3eRxAK8S2NTKRUMJc", "question": "What is missing from the white object?", "choices": ["rug", "footstool", "decoration", "cistern"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000323579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198351, "question_id": "8BV4Tdg5M9B6biA8whU2xX", "question": "What form of clothing is the surfer using?", "choices": ["speedo", "swim trunks", "wetsuit", "bikini"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198351.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83944, "question_id": "8CfZuPghecuSTwPWMY69H6", "question": "What did he probably eat for lunch?", "choices": ["carrion", "rodents", "fish", "leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083944.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66823, "question_id": "8DrpZvk3ZUrfEN4biGQxzZ", "question": "What features regarding coloring makes it most clear that this is an older animal?", "choices": ["darkness", "mane", "spot size", "lightness"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000066823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138903, "question_id": "8FyWLQViLfP7Gxm4nw4hj3", "question": "What type of animals might these creatures like to bite?", "choices": ["humans", "donkeys", "mice", "zebra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138903.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334443, "question_id": "8GjSBZ3CpRFFkRXiSRMDBF", "question": "What brand bus is shown?", "choices": ["luthfansa", "mta", "jet blue", "sun metro"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467014, "question_id": "8JBK2T44qUb5NWio7bibp4", "question": "What is visible on the bed?", "choices": ["remote", "baby", "cat", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467014.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442423, "question_id": "8NX92Xz4cJvLD6MFHHyRE7", "question": "This style of curtain is referred to as what?", "choices": ["for chintz", "sheer", "damask", "canvas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411106, "question_id": "8QvxyF7dBPQQ5Bpo4P7656", "question": "What other activity besides surfing could the surfer's tool be used?", "choices": ["paddle boarding", "kite surfing", "para sailing", "skim boarding"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000411106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138687, "question_id": "8RKPCkwAzzv9qbF2xp2Mwx", "question": "What brand is the woman's surfboard?", "choices": ["liquid shredder", "quicksilver", "roxy", "pyzel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21341, "question_id": "8RRap43RjcLFJNr5ckm6wd", "question": "Where are these animals located?", "choices": ["water", "zoo", "circus", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 492818, "question_id": "8SkPsufBF9AKZXUj5oVdvx", "question": "What animal is closely related to this?", "choices": ["tiger", "anteater", "wolf", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000492818.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572158, "question_id": "8SsS2v79aFT5Q6tMUYSCcD", "question": "Who would the bench be most suited for based on the height of it?", "choices": ["kids", "elderly", "teens", "adults"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572158.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218826, "question_id": "8VfAAHmLu4JYgvJWmpfxnz", "question": "What is this fixture used to access?", "choices": ["electricity", "water", "disposal", "assistance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218826.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402571, "question_id": "8WJ2QTMfjEBrGKNJHZfhwE", "question": "What are the women wearing around their necks?", "choices": ["tie", "lanyard", "lei", "necklace"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400995, "question_id": "8WY3VMZETkMrau34eSz8yg", "question": "Why does the figure on the left have wings?", "choices": ["butterfly", "angel", "bird", "airplane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400995.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545881, "question_id": "8XBxZG3CmwXDqWNuQy6gkt", "question": "What helping aid for a disability does this man have?", "choices": ["walking stick", "glasses", "hearing aid", "zimmer frame"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290805, "question_id": "8XeeZJXfi4gDb3LhGgkvjw", "question": "What temperature is the beverage being served here?", "choices": ["hot", "lukewarm", "frozen", "cold"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290805.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123111, "question_id": "8XoSFaSkPRMgmTEADbKkt7", "question": "What transportation vehicle is this kid's version modeled after?", "choices": ["airplane", "train", "car", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528247, "question_id": "8ZgQ948d7B4W7jgthYEQ6Z", "question": "Entering a short door would cause this animal to do what?", "choices": ["run", "reverse", "nothing", "duck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528247.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188227, "question_id": "8aQvUvWuDc46X9vuQbiHj8", "question": "What is the color of the vessel pictured above?", "choices": ["green", "white", "colorless", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188227.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254094, "question_id": "8cLWuDKDrmvaqobHHub4TW", "question": "What can be stored inside these?", "choices": ["guns", "clothes", "food", "tools"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254094.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63534, "question_id": "8dEZCUgurmCefWLovn5jUZ", "question": "Whose hat does this cat wear?", "choices": ["his", "it's owner's", "madelines", "it's mothers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063534.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 423457, "question_id": "8dPvvzfDWfBir9nnKJeqPE", "question": "Dark condition is due the absence of what?", "choices": ["neutron", "proton", "electron", "photon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000423457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179582, "question_id": "8dh7cPkTpCGsq75y8Bdfny", "question": "What will he fall into?", "choices": ["grass", "water", "dirt", "leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543283, "question_id": "8fC3wyBZS6Z22F4bps4EVo", "question": "What gives the appearance of two elephants?", "choices": ["rhino", "reflection", "grey rocks", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330783, "question_id": "8fw549nnNXCicesiLxLKvn", "question": "What type of hat is the man wearing?", "choices": ["chef", "fedora", "baseball", "skull cap"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 134230, "question_id": "8gqQjaKvtZ7XKtHz5HWxwn", "question": "Why is this person using an umbrella?", "choices": ["snow", "wind", "sun", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000134230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18269, "question_id": "8h8as9FgTfLCDN9YLEhBCg", "question": "What kind of weather would be at this location?", "choices": ["arid", "cold", "hot", "humid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000018269.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535518, "question_id": "8hYsy9eejkxfYH8db7hmxf", "question": "Which one of the following modes of transport would be faster than the ones in the photo?", "choices": ["bicycle", "car", "skateboard", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535518.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170823, "question_id": "8iCqxcQJfX8LYRjEJMPHAJ", "question": "What card game is being played?", "choices": ["poker", "go fish", "solitaire", "blackjack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553178, "question_id": "8ijFurzRWtBTfVMxw55hEU", "question": "What does the giraffe seek here?", "choices": ["mate", "grass", "water", "drugs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553178.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285658, "question_id": "8jDWTCcy93oX5NXSFRhFYE", "question": "What is in the background?", "choices": ["mountain", "donkey", "cat", "baby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285658.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551984, "question_id": "8jRCKaNkWrykwrYHWzU3hx", "question": "What furniture are these bears located on?", "choices": ["sofa", "table", "bookcase", "desk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551984.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407258, "question_id": "8nReuWN8AcZuH3iiwPQAwJ", "question": "Where is this bathroom located?", "choices": ["library", "home", "store", "office"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000407258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334661, "question_id": "8oP8Y5uui8FTaT3Frrs2w9", "question": "What is the removed fixture used for?", "choices": ["cooking", "washing", "exercise", "watching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334661.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118701, "question_id": "8oerdoDMvaUWMV3jn4LgMB", "question": "What brand is the airplane?", "choices": ["virgin america", "southwest", "frontier", "american"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118701.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504459, "question_id": "8ofnjkiTG4GHY2pUszznTp", "question": "What is the process of grooming the yellowish green plant to the right called?", "choices": ["mowing", "grafting", "pruning", "chopping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458658, "question_id": "8qNX3RQiEtyJHumrPmPRnd", "question": "What is likely on the other side of the doors?", "choices": ["bathroom", "backyard", "front yard", "kids' room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458658.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 240809, "question_id": "8rktvtyut8TLJ52q8eG9YT", "question": "What is the purpose of the signs above the vehicles?", "choices": ["visually appealing", "advertisement", "political promotion", "identification"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000240809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96767, "question_id": "8tA5yGJt7EFQfdwfT9GppY", "question": "What language is the sign at the top in?", "choices": ["german", "english", "spanish", "french"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4778, "question_id": "8vFnWiuNmM3kFYqx5hrRt7", "question": "What is covering her right eye?", "choices": ["phone", "hair", "hand", "patch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460720, "question_id": "8wJguKFj3LUpxrTJ2t9P5Z", "question": "If someone wanted to soak their feet where might they comfortably do it here?", "choices": ["sink", "bathtub", "shower nozzle", "toilet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460720.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541890, "question_id": "8wysSwL6NzktB7kxiivngP", "question": "What is unusual about the alligator?", "choices": ["back", "feet", "eyes", "teeth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541890.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551218, "question_id": "8xYTQPHZqoHqkeHD7CfKPg", "question": "What country is this bus in?", "choices": ["united states", "japan", "canada", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4031, "question_id": "8yJvYQsVRuCZpJJosFG7QF", "question": "What would be the reason a company would produce this clock?", "choices": ["money", "hospital necessity", "advertisement", "schools"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004031.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2773, "question_id": "8yN9PauN6ii2uGyUtVpbDw", "question": "What type of woman is this animal traditionally associated with?", "choices": ["nun", "midwife", "prostitute", "witch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002773.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292039, "question_id": "8zWpTXF7oWNj58hBhKfGfR", "question": "How many of these animals have feathers?", "choices": ["three", "four", "five", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435520, "question_id": "92N3jMf96xHWgs3fUoMCQH", "question": "What does the digital sign indicate?", "choices": ["present location", "weather", "ticket pricing", "next train"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000435520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578234, "question_id": "93tV5BErRvbipYeSTSMxCD", "question": "What type of transportation is shown?", "choices": ["land", "air", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317760, "question_id": "94BBGHJjoSFKhTKGDqdWwD", "question": "This dog's owner plays what sport?", "choices": ["none", "tennis", "racquet ball", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186754, "question_id": "94Shi5jkBjRGEu6TK4Cy6K", "question": "What is leaning on the front of this TV stand?", "choices": ["rug", "person", "guitar", "couch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186754.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36154, "question_id": "964KDemGhEGUJ3vXP5qxt7", "question": "What sports team is she a fan of?", "choices": ["trailblazers", "mavericks", "bulls", "lakers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114205, "question_id": "97B22sqVCmuXSBzRH3J28z", "question": "What is needed for this activity?", "choices": ["wind", "snow", "ice", "waves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114205.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532453, "question_id": "98KxXEbkqavW4qW4Cb7RYS", "question": "Why would someone sit at this table?", "choices": ["to sew", "to eat", "to meet", "to wok"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532453.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40976, "question_id": "99xJ6TmvJw4hCK4XMyXK4P", "question": "What does the NS on the side of the train represent?", "choices": ["norfolk southern", "north south", "nova scotia", "notway southby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040976.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122248, "question_id": "9AuQ3iVpMqoHLiKKtp8X73", "question": "Why is this cat riding on top of this float device?", "choices": ["avoiding sun", "eating", "avoid water", "sleeping"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000122248.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 141680, "question_id": "9BfJAoQy5fskchUT4eXmJW", "question": "Skull logo indicates what?", "choices": ["player", "danger", "swimmer", "predictor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000141680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136080, "question_id": "9BoR4ua7GoPsqN8vcfCyxm", "question": "On which side of a Quarter is this animal's likeness most likely to be visible?", "choices": ["side", "front", "back", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151663, "question_id": "9C5UJL5tZgGzk7AfTw32uG", "question": "Why is her head bowed?", "choices": ["meditating", "praying", "sleeping", "hiding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 508461, "question_id": "9F24h4EHhAhLTSsiDBbWpb", "question": "What type of building is shown?", "choices": ["courthouse", "skyscraper", "home", "boathouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000508461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545662, "question_id": "9HnPMjTqVKmapaARLUGewM", "question": "How many people can get a sandwich?", "choices": ["two", "one", "four", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545662.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179955, "question_id": "9HrDdnTNsjDL4yjRetSW9w", "question": "Why is the person tying a rope on one side one of the leg?", "choices": ["swag", "pride", "luxury", "safety"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179955.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385282, "question_id": "9MLxQL6ggFX5gMYrA4Uk8E", "question": "The bedroom is located within what type of structure?", "choices": ["hotel", "charter bus", "ship", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428422, "question_id": "9QzoYXN6vBJg4nm4mnFRSo", "question": "If one continues walking which street will come up next?", "choices": ["w. 73", "e. 75", "e. 25", "w. 75"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311, "question_id": "9SdGzRkC4Va4hz8YYC8zn8", "question": "What is the tall thing next to the building?", "choices": ["street light", "giraffe", "moose", "ladder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000311.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25678, "question_id": "9TaDHKhjjcNAb2CEviWSiL", "question": "Why has the fruit been placed inside the glass container?", "choices": ["to cook", "to sell", "to wash", "to protect"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025678.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 580710, "question_id": "9UBP9Gj5o9qXLzgsYdJgmG", "question": "What is sitting next to the toilet on the right?", "choices": ["dishwasher", "garbage can", "sink", "dryer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000580710.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32086, "question_id": "9UKqqzqshYnqu6yn4a8SU6", "question": "What would someone here need to view Castle?", "choices": ["wii", "dvd player", "antenna", "vcr player"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285830, "question_id": "9UPNBPtvuCgwbEiDq3NHyJ", "question": "Which one of these colors does this flower come in naturally?", "choices": ["polka dot", "blue", "red", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285830.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532223, "question_id": "9WRNQPKDFKzdwSuznenM2L", "question": "What prevents random people from climbing over to get into the airport here?", "choices": ["nothing", "warning signs", "concertina wire", "staked fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532223.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291246, "question_id": "9XMhiTa2PGGucYRCURnVHu", "question": "What national sports league's logo is on the side of the train?", "choices": ["nba", "nhl", "mlb", "nfl"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291246.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 480126, "question_id": "9Z5TUtVe7HqfBY4SBvmxi3", "question": "If another motorists is entering this intersection what should someone driving in this lane in this direction do?", "choices": ["nothing", "drive fast", "wait", "reverse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000480126.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329365, "question_id": "9Z9DSPyGUt6hzQV843DB8i", "question": "What is the person using the electronic device to do?", "choices": ["brush hair", "open door", "power television", "make call"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329365.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565063, "question_id": "9aA6L29VMRs7xoUKKUeVqn", "question": "What does the animal have an abundance of?", "choices": ["feathers", "hair", "wool", "talons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565063.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49241, "question_id": "9ckEvw7yskMxe5nqfEehhM", "question": "What caused the holes in the apple?", "choices": ["stone", "beak", "skewer", "toothpick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254871, "question_id": "9d22phBUNXirNsczMfqadQ", "question": "What is similar to the item the woman is holding?", "choices": ["parasol", "spoon", "chainsaw", "pitchfork"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532453, "question_id": "9eDNzibNvf4rSLYzug6S8n", "question": "What preparation did the meat shown here most likely get?", "choices": ["none", "deep fried", "pulled", "raw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532453.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256925, "question_id": "9f4yGD3buCsRKSjZAUa8XB", "question": "What song refers to the color of this item with the chain on it?", "choices": ["purple haze", "red war", "yellow submarine", "blue moon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89028, "question_id": "9fPxt8r9CwsftFzyyQbmMs", "question": "What sport does the team on the woman's hat play for?", "choices": ["football", "baseball", "hockey", "soccer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089028.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 42660, "question_id": "9iQqqTecRrrR8rUzTsU6z7", "question": "What does the batters stance indicate about how ready he is for the pitch?", "choices": ["waiting", "very ready", "somewhat ready", "ready"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000042660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290822, "question_id": "9jAcWgfxn2GrL7of6siCpD", "question": "What is the snowboarder in the blue pants likely looking at?", "choices": ["sky", "airborne snowboarder", "hill", "own snowboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290822.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153995, "question_id": "9jsSFSci8kputbBDcr6auM", "question": "What type of sound does this animal make?", "choices": ["chatters", "barks", "roars", "hisses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153995.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375010, "question_id": "9kNvSJiFEtgQN2coZfDsH3", "question": "What is the landscape like behind the giraffes?", "choices": ["rain forests", "desert", "plains", "mountains"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000375010.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123385, "question_id": "9mxCoUWeHT5sfQLdktZ8Zf", "question": "Where are these kids?", "choices": ["kitchen", "family room", "bedroom", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123385.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447072, "question_id": "9nTxbiV3EXwMVNA9d87XGo", "question": "What does the 40 on the sign tell drivers?", "choices": ["cows present", "cost", "minimum speed", "max speed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253625, "question_id": "9oCc7xEWtt2JWAt2WLTePF", "question": "What is she ready to do?", "choices": ["serve", "juggle", "dunk", "dribble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512693, "question_id": "9qpcXcBVcpcq6wcSnQDudL", "question": "What is he eating?", "choices": ["doughnut", "brownie", "cupcake", "cookie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512693.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360946, "question_id": "9qtyBVxSyozPCFxe954YWW", "question": "Where is this train located?", "choices": ["germany", "england", "spain", "usa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216774, "question_id": "9sbQB9psKgE5jYr3Ywt8iy", "question": "The odd aspect of this picture is which quality?", "choices": ["reflection", "eye color", "brick", "color"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216774.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 246556, "question_id": "A65R8EZ5Y8nHaAa59Hj9NS", "question": "What is the red object on the street connected to?", "choices": ["water line", "police station", "land line", "telephone pole"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000246556.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541064, "question_id": "A6EFcvmdP7EMYnJfs29SCR", "question": "What is the skier doing on the rail?", "choices": ["grinding", "posing", "resting", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485482, "question_id": "A6vXisFsG4fcEXev8QsEMR", "question": "What part of a surfboard is this person's feet at?", "choices": ["tail", "deck", "rails", "nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366265, "question_id": "A7mrSVVX2ydHzcejturrHw", "question": "What does the eyewear protect this skier from?", "choices": ["snowballs", "bugs", "acid", "snowblindness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 476994, "question_id": "A7oR5cb9SXvS3WsUE9TqPA", "question": "What propels these skiers forward here?", "choices": ["nothing", "skiing", "magic", "pulling ropes"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000476994.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209502, "question_id": "AAhiCCeM3rGbLo2J5y3nzZ", "question": "What is sold in the box by the building?", "choices": ["soda pop", "cigarettes", "chips", "newspaper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209502.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60067, "question_id": "AAutDQF7KYXE8j4ZV3kxcY", "question": "Which quadrennial event is portrayed by this conveyance?", "choices": ["olympics", "none", "coke festival", "superbowl"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 526499, "question_id": "ABTDXUSKySzgVXAjTsmLhy", "question": "This animal is about 13 and a half what tall?", "choices": ["yards", "inches", "meters", "feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000526499.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299222, "question_id": "ABnMQLnfVhEAhTKtVSpq5j", "question": "What were the first surfboards made of?", "choices": ["clay", "wood", "banana leaves", "straw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299222.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345945, "question_id": "ACDWo4pKnqh9RpBoxD6cEp", "question": "What is the profession of this woman?", "choices": ["barber", "athlete", "chef", "baker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345945.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143923, "question_id": "ADVSjHfTwbXjmHXg7RRx3G", "question": "What is this walkway made out of?", "choices": ["diamonds", "glass", "tiles", "tar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000143923.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64956, "question_id": "AEhu7SsvSe6TA5ComYZUVV", "question": "Why is he on back of the board?", "choices": ["confused", "to balance", "cutting waves", "falling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064956.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377151, "question_id": "AEsXfR9ZyRW3TKvam8YQhm", "question": "What object is present on the man's hands?", "choices": ["handle", "gloves", "wetsuit", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295826, "question_id": "AFTsEpBXo6rMyHgnYP3PKD", "question": "What gives these sunflowers a reddish tinge?", "choices": ["colored liquid", "genetics", "milk", "clear water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295826.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39023, "question_id": "AGnMUVUPo7VLvV7xqxHYDv", "question": "How many times a week are people encouraged to use the blue item?", "choices": ["one", "two hundred", "fourteen", "twenty"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21729, "question_id": "AHrjGm89saGAKMixV2iqEV", "question": "What is most like what the boy is waiting for?", "choices": ["love", "rain", "his bunny", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76241, "question_id": "ALjESzxejWqtTxmzfwTMbT", "question": "Why is the man wearing a yellow jacket?", "choices": ["fashion", "visibility", "camouflage", "costume"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162980, "question_id": "AM4pzKastPR2Hp2o47QAGs", "question": "What is a common play in this sport?", "choices": ["kickoff", "walk", "icing", "goal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162980.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446487, "question_id": "AMk24XD8XDamR9g2MmR42g", "question": "What is the sink at the top used for?", "choices": ["washing hands", "washing food", "bathing", "washing clothes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446487.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384505, "question_id": "ANVMKmbRp6cq6QTvVbaGiW", "question": "What is this type of pillar called?", "choices": ["new", "decorative", "utilitarian", "classical"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193374, "question_id": "AP3bCAsNJvUpg9xh7EUdP2", "question": "What activity does the pigeon do here?", "choices": ["roost", "feast", "incubate", "mate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193374.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214508, "question_id": "APZSGPPudq6qQtNCCiFLVs", "question": "Why are these giraffes stretching their neck?", "choices": ["to search", "to drink", "to eat", "to play"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75396, "question_id": "AR3wcwESgTTAW7DZnLRvyM", "question": "The red tag signifies what?", "choices": ["ownership", "diet", "vaccination", "danger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318011, "question_id": "AR4zf2SoLK6UM9FABbCcwp", "question": "What is the brush shown used for?", "choices": ["hair", "clothes", "teeth", "toilet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318011.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445044, "question_id": "ASrwFtE6SbVGYQnkEMjRFh", "question": "What are the little pictures on the right of the text screen called?", "choices": ["emojis", "avatars", "minis", "icons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143648, "question_id": "ATHvZBhTFQg2eNcCEDTXKY", "question": "What time of day is illustrated here?", "choices": ["8 pm", "7 pm", "midnight", "morning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000143648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304823, "question_id": "ATerWQE9N7mKuE9Zghb5Sr", "question": "What style of cake is being served?", "choices": ["tiered", "sheet", "cupcakes", "loaf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204412, "question_id": "AU5FjrgNgD4onjs8ayVtSD", "question": "Where were the flowers picked from?", "choices": ["bush", "field", "lake", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204412.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223773, "question_id": "AUa5miYNFJn3CogQoWdFz4", "question": "How were those pallets probably transported there?", "choices": ["ship", "train", "car", "bicycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223773.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544630, "question_id": "AUzbvsgd9PSGcBHNSQkHNR", "question": "Who is a sponsor of this event?", "choices": ["coca-cola", "bnp paribas", "chase", "frito-lay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544630.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537601, "question_id": "AZSF5sru8yaaHeKDSwky3x", "question": "What's the man trying to fly in the air?", "choices": ["drone", "kite", "balloon", "lantern"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120040, "question_id": "AcmgJWmgZsSDq5EqoQuZ7a", "question": "What does this food truck most likely sell?", "choices": ["pumpkin bread", "pizza", "pierogis", "curry chicken"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383034, "question_id": "AeEWAzM3MiXMWux3KyLEAc", "question": "Why is the cat looking under the monitor?", "choices": ["is posed", "is hungry", "is curious", "is frightened"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000383034.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310534, "question_id": "AeedJXs9euHL367tWE2YG7", "question": "What does the woman have on her feet?", "choices": ["sneakers", "boots", "dress shoes", "sandals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310534.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97476, "question_id": "AiXuTEkpWN4XEogve8tHcM", "question": "What type of pan was the pizza made in?", "choices": ["stainless steel", "ceramic", "cast iron", "tin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135358, "question_id": "AjAw8icv9NdqkFaTSWTNon", "question": "What famous starship is the pizza cutter fashioned after?", "choices": ["enterprise", "daedalus", "explorer", "excalibur"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400066, "question_id": "Am2xBtc3yUeZF3NtQqd8ur", "question": "What type of electronic device is the man using with the stylus?", "choices": ["pocket pc", "palm pilot", "android", "blackberry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433837, "question_id": "AmsXitZYiaFP2NWj65a34z", "question": "Relative to the photographer which giraffe is sitting?", "choices": ["foreground giraffe", "both", "none", "background giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 361976, "question_id": "ApehUg2dWL3EGrCfx78eTv", "question": "What type of animal is the cow nursing?", "choices": ["calf", "puppy", "kitten", "squirrel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000361976.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122068, "question_id": "Apx8YPzZaSYyHUrBCSCEU2", "question": "Where does the vegetable in the image grow?", "choices": ["overground", "on trees", "underground", "on bushes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122068.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528823, "question_id": "AquMab3NxhX7yukPEHnEjy", "question": "What is being celebrated?", "choices": ["harry's birthday", "new baby", "anniversary", "graduation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470835, "question_id": "ArjtzHDyPUYV8j5yFcMG9k", "question": "What color will the fruit turn?", "choices": ["orange", "yellow", "red", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 46064, "question_id": "AscxZvb3qhbhERYXYn5wz6", "question": "What kind of animal is this?", "choices": ["wild", "reptile", "domestic", "aquatic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000046064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273726, "question_id": "AtjKDNkjEbRqDXSWtwnSE4", "question": "What kind of area is this?", "choices": ["residential", "suburban", "downtown", "business district"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481107, "question_id": "Atr62SyHyyAJRE8KeDeuQK", "question": "The walls are made from what type of wood?", "choices": ["pine", "maple", "oak", "hickory"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481107.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558059, "question_id": "AwKz4znK6SQFgMaFhDESeV", "question": "The table here is meant for what activity?", "choices": ["sleeping", "skateboarding", "picnic", "dancing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296535, "question_id": "AwTGUEHH4Ts6Y3N3V5GETg", "question": "Why is she holding the phone like that?", "choices": ["stole it", "displaying it", "hiding it", "offering for-sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445646, "question_id": "AyCuuJkfQnjadJxHxgiN38", "question": "What body part is extending the most here?", "choices": ["head", "arm", "foot", "thigh"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180075, "question_id": "AyuM7bxR8joUUaSbMuG59V", "question": "This mans legs have visible what?", "choices": ["blood", "ink", "reflection", "silver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180075.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38182, "question_id": "AzoViXNetbxAsnt6fU2MRM", "question": "What weighs the most here?", "choices": ["bear", "man", "woman", "bowling ball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19650, "question_id": "B26forVFYjP4pNkGKjgaTt", "question": "What is the man trying to fix?", "choices": ["toilet", "toaster", "sink", "bathtub"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000019650.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436563, "question_id": "B2KtKQoqxucfLoWnmBPASX", "question": "What type of material is often associated with riders of this vehicle?", "choices": ["leather", "cotton", "polyester", "silk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436563.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270551, "question_id": "B3j8YbheHBuKFnXNhXULEf", "question": "The cat is likely elevated by what item?", "choices": ["speaker", "monitor", "table", "tv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528871, "question_id": "B4UiDTLAK7opPmyaPoKZVM", "question": "What kind of sign is this?", "choices": ["astrological sign", "peace sign", "street sign", "stop sign"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525043, "question_id": "B4erYEPz5HzkXLBjQ2unoa", "question": "What is this elephant's job?", "choices": ["push", "carry", "jump", "pull"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525043.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444844, "question_id": "B7EVG4xWV7WhEfdSSMvgHy", "question": "What is this appliance used for?", "choices": ["cooking", "washing", "lighting", "cooling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444844.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143293, "question_id": "BCfpPoqrAH7C8biWVD8qbs", "question": "The association mentioned is probably concerned with which one of these disciplines?", "choices": ["travel", "painting", "piano", "biology"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000143293.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267541, "question_id": "BDoELybgqxLirW5K6iZTmD", "question": "To use the item on the toilet what must you do to it?", "choices": ["light it", "flush it", "wash it", "nothing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000267541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 174404, "question_id": "BEP5sw5yUxr7AfU87BmJEj", "question": "What would the paddle help the surfer with when catching a wave?", "choices": ["sharks", "speed", "rocks", "balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000174404.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363722, "question_id": "BGTPTxzCt4xz22vy7DJ6wN", "question": "What is the canvass tarp for?", "choices": ["keeps dry", "keep warm", "hides contents", "decoration"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000363722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227507, "question_id": "BH8EvhJ8DAq6oipAuyUrzU", "question": "Why are two guys wearing blue and two wearing red?", "choices": ["siblings", "skill level", "teammates", "color preference"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000227507.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103514, "question_id": "BJ9nsnWyfqBGWbmRfm2opg", "question": "What makes the image blurry?", "choices": ["dirty sheep", "bad film", "dirty lens", "dirty cameraman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199467, "question_id": "BJqUUu4RSAE6g2CcgiqqGv", "question": "What would one expect to find nearby?", "choices": ["donkey", "bed", "boat", "person"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199467.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304547, "question_id": "BKHdvfPPQDBd3Co3h67F92", "question": "What type of person probably added the sticker?", "choices": ["artist", "musician", "environmentalist", "evangelist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304547.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352489, "question_id": "BKi2zsLd5jgVgPic3EZwf9", "question": "What activity is the non-striped nonhuman animal on the left performing?", "choices": ["sitting", "grazing", "sleeping", "mating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352489.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410427, "question_id": "BKjQKpppC5eWtdQp6tYoae", "question": "What sort of hair is seen hanging here most likely?", "choices": ["cat", "human", "donkey", "zebra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6530, "question_id": "BLNzjkFRqwuNWLww9ULtS2", "question": "What is likely in the baby's mouth?", "choices": ["clothes", "bib", "peas", "bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516256, "question_id": "BS2nhFt2hR7hN6A9jTDGkE", "question": "What is the guy eating?", "choices": ["fruit", "vegetables", "treat", "hamburger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516256.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537557, "question_id": "BT8YNV9f4Cg7BizUehbPFx", "question": "What is the person holding in their hands?", "choices": ["umbrellas", "bats", "racquets", "poles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159284, "question_id": "BT9du5pwM3JJMtxWaiD79K", "question": "What skateboard trick is the boy performing?", "choices": ["grind", "twisty", "kickflip", "ollie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159284.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81863, "question_id": "BUbUyEUjBA6Qo4hCzJAFN4", "question": "Where on this cats body might you find it's name?", "choices": ["ear", "collar", "hat", "paw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74783, "question_id": "BUz4BSSraxxx2kzJjstSpz", "question": "Why is the man wearing a camera on his head?", "choices": ["advertisement", "protection", "documentation", "privacy"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000074783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201383, "question_id": "BWQvLvcf37PynDNNaDqJTp", "question": "What is the animal looking at?", "choices": ["dog", "giraffe", "zipper", "photographer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201383.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466081, "question_id": "BYgg3hAxzrZVUDt28ZSoHF", "question": "What sound might the bird shown here Make?", "choices": ["buzzer", "cuckoo", "none", "trumpet blare"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466081.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495844, "question_id": "BZ2qXzW5snqmRzWbUqFBhY", "question": "What is the tall portion of the building called?", "choices": ["clock tower", "church", "domed", "garage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495844.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157620, "question_id": "Bb52rhbRPquvCANvvdfbNj", "question": "What would this bullet train be mainly used to transport?", "choices": ["people", "supplies", "vehicles", "tanks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157620.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345369, "question_id": "BbsqhTwYDrah7DNFLowLwg", "question": "What type of building is this?", "choices": ["palace", "airport", "hospital", "mall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345369.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473034, "question_id": "Bc4sdiYBdQGFgkvzxQMSZZ", "question": "If a dog attacked these animals what might they use to puncture it and stop it?", "choices": ["horns", "gun", "knife", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473034.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325714, "question_id": "BcH4MHDqgK5osFLLnYtTeo", "question": "What items might one buy in this location?", "choices": ["golf clubs", "phones", "stereos", "tvs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330658, "question_id": "BccZcm3yTELFKjRSvEZ8Cc", "question": "What type of beverage is being stored in the refrigerator?", "choices": ["wine", "energy drinks", "beer", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330658.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127948, "question_id": "BdjGnziYiwAQa6rgMzB4ih", "question": "What is the bear near?", "choices": ["bench", "chair", "clown", "grass"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000127948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581067, "question_id": "BdpUmtt3chzsQu25NrMkTQ", "question": "What posted limit is clear for persons moving vans when they move into this area?", "choices": ["none", "2000 pounds", "8000 pounds", "9000 pounds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96786, "question_id": "BdzbA9eFnPjpivXx85Su73", "question": "What is the theme of the pictorial the man and women are creating?", "choices": ["comedy", "horror", "historical", "drama"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000096786.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64607, "question_id": "Be7737J4L46SYH8mQJd9n9", "question": "How many cupboards do you see?", "choices": ["one", "none", "four", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506494, "question_id": "BfcKmTtMFNMPDBaMzhJSDS", "question": "What is the item in the foreground that is covered up most likely?", "choices": ["pogo stick", "axe", "sign", "bazooka"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506494.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 236235, "question_id": "BhCLXUibYoD66M8cJuQCA3", "question": "What sport is this athlete participating in?", "choices": ["snowboarding", "roller skating", "skateboarding", "skiing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000236235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465157, "question_id": "BhHrdEvWAjvyasxKLszBFV", "question": "What activity is being performed by the bottom animal?", "choices": ["grazing", "swimming", "mating", "urinating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465157.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5656, "question_id": "BhXR6nW55nuYNRVnuycehK", "question": "What are most shower curtains made of?", "choices": ["linen", "wool", "polyester/plastic/vinyl", "cotton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52716, "question_id": "BhpbaHjQvjZ87dDNWe9Fxq", "question": "What thing would move with the person's foot if they moved their right foot?", "choices": ["left shoe", "wood floor", "door", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052716.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441689, "question_id": "BiiPSuDu6UAJdjj4pCpreW", "question": "Which item is used for something other than marking time?", "choices": ["round object", "windowed object", "blue object", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441689.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346348, "question_id": "Bjj6fpgkW4ZwYxxMo23nn3", "question": "Why is the man skateboarding in this setting?", "choices": ["boredom", "commute", "competition", "recreation"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000346348.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 479258, "question_id": "BkK2gnaFBgDAgSWfArLc5f", "question": "What food might be found if these animals look in a corner here?", "choices": ["hay", "oat meal", "mice", "candy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000479258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156734, "question_id": "BkP2bqX8HT2ToiPYFjDgeC", "question": "What happened to this mans hair?", "choices": ["wet", "dreaded", "died", "lost"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000156734.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84116, "question_id": "BkxgHm6QN89Rd5iDZuV5Fe", "question": "What sport does he have the equipment for?", "choices": ["snowboarding", "tennis", "skiing", "skateboarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084116.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471509, "question_id": "BmXRnWfyUyoyzwoE5FZVTu", "question": "Standing directly against the grated item here for long periods pressing the grill may cause what?", "choices": ["burns", "frostbite", "wisdom", "creativity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471509.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209629, "question_id": "BmktMZzjceMSv8WJvTVRQS", "question": "Where is the large plane shown here flying?", "choices": ["down", "no where", "left", "skyward"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000209629.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493788, "question_id": "BnGZWYXavdNSvSrJauNFh5", "question": "Stand alone counters are called what?", "choices": ["valleys", "mountains", "islands", "rivers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493788.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553591, "question_id": "BoULoxQUrf6LwRCNbyeaU5", "question": "What do the long black chords carry?", "choices": ["food", "electricity", "water", "beads"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553591.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200485, "question_id": "BoYPC5PhfBjFoZZg63c2Bx", "question": "In which United States city is this fire hydrant located?", "choices": ["los angeles", "new york", "santa fe", "san francisco"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194825, "question_id": "Bp4R6xpaxfSPaXN48F4VAT", "question": "Where are this young seedlings placed?", "choices": ["farm", "greenhouse", "nursery", "field"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000194825.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395454, "question_id": "BpVRwTuyyRdJiEwfEaxHZe", "question": "What is needed for the item under the plane to trigger upon impact?", "choices": ["batteries", "water", "oil", "fuze"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395454.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38133, "question_id": "BqMv9qbxJGVxBRVPJtoYPp", "question": "What is the person in the process of doing?", "choices": ["backflip", "belly flop", "cartwheel", "wiping out"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038133.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 496697, "question_id": "BsHKBg9axRucUZmyeccsP7", "question": "In which position does the giraffe that is oldest stand?", "choices": ["center", "right most", "behind camera", "leftmost"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000496697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 430419, "question_id": "BsyL58RiTJiXR8ScNs4iZH", "question": "The characters on the frisbee are known as what?", "choices": ["librarians", "nurses", "princesses", "maids"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000430419.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360656, "question_id": "BszfLjdfxYNNTQTb37XjUR", "question": "What is most likely inside of the object that the cat is sitting on top of?", "choices": ["food", "drones", "mud", "flowers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420991, "question_id": "Btcbhj3KSFi8iDrARFSUad", "question": "The path that this railway follows is known as?", "choices": ["rail track", "road", "path", "line"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190329, "question_id": "BuMFW5LHtW2V2roaxD4PTv", "question": "Whose room is this?", "choices": ["mother", "whole family", "man cave", "babies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495206, "question_id": "BwT7JgamCVxCZDVwPKVGxA", "question": "The symbol in the middle is supposed to represent what?", "choices": ["moon", "comet", "sun", "star"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495206.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407342, "question_id": "BwbfojNxdXifysRD5ksk8G", "question": "Which cyclist is least protected from harm?", "choices": ["green vest", "white t-shirt", "orange jacket", "grey t-shirt"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000407342.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113208, "question_id": "Bz4bmrttActSUQZAf2guDS", "question": "What type of plants are growing here?", "choices": ["grasses", "bushes", "flowers", "trees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373394, "question_id": "C2HKGdRNSguogHtWr4PQ7u", "question": "What is the hydrant looking object on top of?", "choices": ["mud", "manhole cover", "donkey", "curb"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 307778, "question_id": "C2Z2QHeHJmYkeMEqGzqRaA", "question": "What is being tracked by the item hanging in the window?", "choices": ["distance", "money", "age", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000307778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468427, "question_id": "C2xBxgSLF7t6Bvc2axwkKd", "question": "What move is this?", "choices": ["bounce", "whiff", "strike", "tackle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314191, "question_id": "C3SmgwPgYDYYSkUV5bmvxG", "question": "What is on the pizza?", "choices": ["ketchup", "potato chips", "mushroom", "lettuce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314191.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378630, "question_id": "C72BoaiUVE5briB7LQTKEP", "question": "What is required for this activity?", "choices": ["wind", "snow", "water", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378630.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180053, "question_id": "C77yGGfYyzGdCm6VcLJCAL", "question": "What is the kite shaped like?", "choices": ["jet ski", "airplane", "submarine", "yacht"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122580, "question_id": "C7mQTWsgJn8oPjpqbXxF8K", "question": "What is covered by the snow?", "choices": ["dog", "wheelbarrow", "bicycle", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120040, "question_id": "C9nMfyENe27ycaT4Nc834s", "question": "What type of truck is this?", "choices": ["pick up", "moving", "food", "delivery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466334, "question_id": "CAWegPRLnD5jMVM45zS4gV", "question": "Which fact is true above this animal?", "choices": ["carnivore", "none", "tallest", "reptile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466334.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567209, "question_id": "CAzyD4awgeefkEXKk9uZG3", "question": "What does this storage nook area smell like?", "choices": ["honey", "meat", "powder", "lavender"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87628, "question_id": "CEHvGV9or6biGPh2dtEBha", "question": "This airplane is part of the largest airline holding company in what region?", "choices": ["latin america", "oceana", "europe", "asia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087628.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343133, "question_id": "CEUcHkSmiPPkkzDYA8bSgN", "question": "Predict the future weather?", "choices": ["heat", "fog", "sun", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343133.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65193, "question_id": "CGWwthV6xAZsBGiSHjUV34", "question": "What is the profession of this doll?", "choices": ["cashier", "athlete", "pilot", "driver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065193.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408118, "question_id": "CGupvk7k5Pc339FUdKntCc", "question": "What is on the wall behind the player?", "choices": ["art display", "advertisements", "instructions", "confessionals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000408118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346866, "question_id": "CHFRU7t3bQV6TZYqAw8QKU", "question": "What type of function is the plane on the runway generally used for?", "choices": ["passenger travel", "medical transport", "military", "mail delivery"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000346866.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494294, "question_id": "CHKipYn3JaYApH2p7QNg5g", "question": "How many slices of pizza are there?", "choices": ["one", "four", "three", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494294.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563724, "question_id": "CJ9NsF77JU6BDw6a4cimYA", "question": "What era are these planes most likely from?", "choices": ["civil war", "wwii", "wwi", "wwiii"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156678, "question_id": "CKcmLC8ZRxQjBsJvAVbq7C", "question": "What type of shot is the woman about to hit?", "choices": ["backhand", "serve", "forehand", "slice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156678.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126752, "question_id": "CP2JZHM2QWqbdxJyjtXFC2", "question": "What language is the sign in besides English?", "choices": ["french", "cantonese", "german", "japanese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188378, "question_id": "CRXUotie7SbcA9u69YBz8N", "question": "Euclidean geometry is used to design kite's what?", "choices": ["material", "weight", "shape", "length"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188378.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473162, "question_id": "CUdGb9Qq4kkycF3n3UWXAn", "question": "What sort of building might this bathroom be found in?", "choices": ["bus station", "airbnb", "mall", "gas station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114969, "question_id": "CWhHeJRzkBvWQ562hbZZRP", "question": "What type of flowers is this decorative flower?", "choices": ["shared", "burnt", "cut", "crisp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114969.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419936, "question_id": "CaWr5q8UZdoonp8tWCMbiQ", "question": "Which one of these colors is missing from the flag of all the countries that operate this airline?", "choices": ["red", "blue", "white", "purple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419936.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355358, "question_id": "CcH8pH47Ceg3AeBppjKivo", "question": "The person here likely did what before going to sleep?", "choices": ["decorating", "fighting", "dancing", "watch tv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85469, "question_id": "CdQHH5CG9gM3m4kieUpqJG", "question": "Which common utensil is missing here?", "choices": ["hatchet", "saw", "hammer", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219977, "question_id": "CdtNxhNrQpejuTzgQpjcPU", "question": "What is needed to control these objects?", "choices": ["computer", "string", "battery", "remote"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000219977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 278810, "question_id": "CfDX96pTHGkhCP6Njw8JVS", "question": "What has the man on the board just skated out from?", "choices": ["tunnel", "house", "shed", "booth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000278810.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352304, "question_id": "CiVAo8oAgPLcaN4fjaEBv5", "question": "What is the woman doing?", "choices": ["eating", "writing", "singing", "brushing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328630, "question_id": "Cj4qn7UDMmQZiXXPe54VP8", "question": "People leaving this plane should prepare for what type of weather?", "choices": ["summery", "dry", "freezing", "rainy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328630.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478208, "question_id": "CjCZqDBDhNbeNzADEi5v7a", "question": "What is the object called that the baby is sitting in?", "choices": ["sink", "milk can", "washtub", "barrel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41411, "question_id": "CjEUfpaUy7Lkdb8zL7K47j", "question": "Which body part get benefits because of Frisbee?", "choices": ["head", "spine", "leg", "hand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041411.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251918, "question_id": "CjZSKBRsScBLUPyvyaHve4", "question": "The blue letters are the initials for what higher learning institution?", "choices": ["boston university", "boston college", "fordham university", "vassar college"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251918.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504846, "question_id": "CkATS2qCcegBkVtZ6aFXHq", "question": "The person enjoying this treat is prepared for what today?", "choices": ["diet", "wind", "nothing", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438120, "question_id": "CkE4CHXxKr6qfBD3394D7i", "question": "Which of these food groups is missing from the plate?", "choices": ["fruit", "vegetables", "grains", "protein"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478054, "question_id": "CnEAUCijNGmm6hMGLjqb5b", "question": "What type of school would most likely keep these items in stock?", "choices": ["high school", "preschool", "middle school", "university"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506345, "question_id": "CnQzpMDSppHnfXtz4FSiSP", "question": "This area allows one to clean themselves in only which manner?", "choices": ["spray down", "bath", "shower", "hose off"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506345.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485104, "question_id": "Co4DKh8CuxtpcdFzNYZjxo", "question": "What is next to the man?", "choices": ["monkey", "beer bottle", "carpet", "fire hydrant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271146, "question_id": "Cos8m2LwGMK8bCw6dp4csc", "question": "What is the device used for?", "choices": ["powering", "cooking", "calling", "cooling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271146.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39440, "question_id": "CqcX9TtHGxguSQ9Sqhf3Cj", "question": "Why is the man wearing a vest?", "choices": ["city worker", "fashion", "skier", "weather"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039440.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355634, "question_id": "CsVYkHoVdPxa6FTrXPMXwq", "question": "In which country is this pedestrian crossing located?", "choices": ["vietnam", "taiwan", "china", "japan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355634.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441820, "question_id": "CtaSSfUT6MgRyJeudXWExy", "question": "What type of sink is this?", "choices": ["commercial", "laundry", "bathroom", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441820.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 149264, "question_id": "CvfPC9xK8urDYM9yaDPeyH", "question": "What meal is coming up next?", "choices": ["midnight snack", "breakfast", "dinner", "lunch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000149264.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 236305, "question_id": "CwDG6RSWXN5mr848SCJRHL", "question": "What feature does this animal have?", "choices": ["fins", "paws", "wings", "talons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000236305.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399493, "question_id": "Cxgr7jZAapYsR3R2TBgvNZ", "question": "What is coming off of the item the woman is holding?", "choices": ["grass", "water", "fire", "bugs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566933, "question_id": "CyDWg5E54iUGRxJkXjtHdb", "question": "What is the red sign threatening a fine for?", "choices": ["speeding", "turning", "honking", "parking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 113339, "question_id": "D25fpykWTt5ZrvHFNmU6GP", "question": "What type parking is allowed near the yellow lines here?", "choices": ["emergency", "diagonal only", "parallel", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000113339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128933, "question_id": "D28BhLeoLw4tdmnryBwodG", "question": "What motorcycle manufacturer is on the mousepad?", "choices": ["harley davidson", "honda", "ducati", "yamaha"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000128933.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420095, "question_id": "D2abQ3R4ChcFDqQ9p5x3VM", "question": "The bird is likely looking at what?", "choices": ["reflection", "dog", "seed", "donkey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371727, "question_id": "D52W9z7bch8aDZn7UmdgvS", "question": "What is the doll reading?", "choices": ["map", "magazine", "kindle", "book"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473694, "question_id": "D6QrjhNjLY35snwCDh4iSk", "question": "On what type of furniture is this cat resting?", "choices": ["sofa", "chair", "table", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473694.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448873, "question_id": "D6eqnhksYhghiLBdVcyTWq", "question": "Besides the wheels what parts make a spinning motion?", "choices": ["propellers", "nose", "tail", "wings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448873.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379459, "question_id": "D7un8Rc7vNj2W9Cc4VNXcU", "question": "What geometric shape can be seen?", "choices": ["triangle", "rectangle", "circle", "square"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318894, "question_id": "D9o6Vqkb7bMwEZuHTuJ8cG", "question": "This room is receiving plenty of fresh what?", "choices": ["meat", "fish", "water", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136201, "question_id": "DABjtPLiqGNbjF5nFAPuy3", "question": "How many flights does this plane make in an average month?", "choices": ["none", "three", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136201.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404155, "question_id": "DAPmq6GHBY7DfyvRSSNVrP", "question": "What are modern street lamps powered by?", "choices": ["fire", "solar", "gas", "electric"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271644, "question_id": "DCeLQr7Zpsv4BQ55DXfxf8", "question": "What can be used to describe the giraffe's mouth?", "choices": ["agape", "deformed", "closed", "diseased"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271644.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 380213, "question_id": "DEgFDLqkMhwoMpKdxscFiu", "question": "What type of kitchen is this?", "choices": ["hospital", "residential", "commercial", "food truck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000380213.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547708, "question_id": "DFAfHAkySd5Y7yEpPUwYDb", "question": "What type bus is shown here?", "choices": ["shuttle", "train", "party", "city"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96520, "question_id": "DK45C3HC8kTJhjsLzA2D3e", "question": "The middle one wants to avoid doing what with evil?", "choices": ["do", "hear", "see", "speak"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 241680, "question_id": "DKjA3tqfwNvXaxgm2yN82T", "question": "It would be difficult to practice this sport in which one of these countries?", "choices": ["finland", "canada", "jamaica", "switzerland"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000241680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112283, "question_id": "DLgndsqcC9pc7kfDF3ixXJ", "question": "What would cause the rubber Frisbee to be stuck to the dog's upper teeth?", "choices": ["holes", "glue", "gravity", "tape"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488525, "question_id": "DMUr6W6CcRfpjcivp9p9Fg", "question": "What is the controller controlling?", "choices": ["television", "pins", "speakers", "ball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488525.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326444, "question_id": "DPFcKQqKSM8LNLYZ5ZiEaK", "question": "Where are the men playing at?", "choices": ["football field", "skate park", "amusement park", "basketball court"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326444.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346822, "question_id": "DPbfChjwSBkTL9UgvyYjcd", "question": "What writer created the character upon which the bear on the left is based?", "choices": ["rowlings", "poe", "milne", "potter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346822.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 39218, "question_id": "DPvvEFF9A3SEgQbJ7RU2Vg", "question": "The woman is looking in what direction?", "choices": ["down", "below", "up", "behind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000039218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555395, "question_id": "DQF6GBGEWg3bTNrnDFpgWy", "question": "What is the genus for this animal?", "choices": ["canis", "equus", "aves", "bovidae"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555395.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169061, "question_id": "DQThv5iCQggRHSSR4RZnba", "question": "Where are these animals?", "choices": ["ocean", "field", "sky", "city street"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169061.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120213, "question_id": "DQyxHvqqJPeXWjr27Njizo", "question": "How many random stops does this bus make today to take on new unscheduled passengers?", "choices": ["about 5", "none", "seven", "20"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120213.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350495, "question_id": "DRFZ2vdU4USaZm8rd5SmvS", "question": "What is the time displayed by the phone?", "choices": ["920 pm", "902 pm", "209 pm", "209 am"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350495.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26080, "question_id": "DRX6zgWbXXhRdwm7rSbWGh", "question": "Why does the man have something in his hands?", "choices": ["defense", "balance", "weapon", "signal"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000026080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87237, "question_id": "DUP5do6wiKFCSVYLwEtTnH", "question": "Which type of food is missing on this plate?", "choices": ["vegetables", "starch", "meat", "grains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127315, "question_id": "DV7dh7AsV8EAnJQTdoVpY5", "question": "What is the cat going to do with the bird in its mouth?", "choices": ["eat it", "reanimate it", "hug it", "save it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573244, "question_id": "DXxVciujXmZxMAUdbJwE3j", "question": "Who was a member of the WWE wrestling team whose name appears at the top of the man's jacket?", "choices": ["triple h", "chris jericho", "mankind", "dynamite kid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573244.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73886, "question_id": "DZkntzXECzpcBuNhk4zGuF", "question": "The round red items are sourced from which plant?", "choices": ["pepper", "tomato", "none", "egg plant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073886.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404925, "question_id": "Da2MV2TYTtxXbQQeqQnWGG", "question": "Where are the trains parked?", "choices": ["alone", "train station", "tunnel", "inside"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333757, "question_id": "Da6RJ8wMQQctfCQRA3aEr2", "question": "What type of buildings are these?", "choices": ["skyscraper", "barn", "rancher", "colonial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226678, "question_id": "DbRSL4XkwiX9UFvp8vqQk3", "question": "What is the giraffe looking at?", "choices": ["watering hole", "photographer", "tree", "bushes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226678.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431263, "question_id": "DfmqVnZjv98AxeHera5DsQ", "question": "What entity placed the umbrella over the dog?", "choices": ["photographer", "humane society", "dog", "dog's mother"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330587, "question_id": "Dfsc3q7gWkC9Yg8Qq8ASSb", "question": "What are cars unable to do in this area?", "choices": ["exist", "keep driving", "turn left", "park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330587.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68991, "question_id": "Dg3CVdA5bnSYsCp7pUcaMd", "question": "What color is the most surprising item shown here?", "choices": ["yellow", "brown", "gray", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068991.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118303, "question_id": "DhNuu3BX3iA2PGGi2y4CQ3", "question": "What item is usually used with this kind of food?", "choices": ["sugar cubes", "salsa", "relish", "pizza cutter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167124, "question_id": "Dhbc3eyVFaNBi5BqXDDccv", "question": "What has the dog caught in the field?", "choices": ["cat", "rabbit", "mouse", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167124.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517358, "question_id": "DiB69QhKLvkBjRgdFCADH7", "question": "What is the person using the remote to operate?", "choices": ["fan", "television", "garage door", "air conditioner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114243, "question_id": "DjKjhKoECghNXxtFF3BwdV", "question": "What energy source powers this item?", "choices": ["gas", "battery", "coal", "solar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 530550, "question_id": "Dk7sXR8xhHC2MznjhsHKao", "question": "Which athletic competition could he participate in?", "choices": ["paralympics", "olympics", "special olympics", "junior olympics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000530550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120941, "question_id": "DkMPGkA5fFoKrxTpdezDPC", "question": "What is the purpose of the luggage cases for belongings below the man?", "choices": ["drying", "watering", "heat", "protection"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000120941.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515425, "question_id": "Dm9pMWxsJ3VNUzqN4pExym", "question": "What is keeping this vehicle from going off course?", "choices": ["tracks", "cones", "traffic cops", "caltrops"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357133, "question_id": "DmeWWVmvb8twjZFA4ZWM9s", "question": "What are the two protruding structures called on the top of the giraffe's head called?", "choices": ["pipes", "ossicones", "hornettes", "antlers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357133.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107274, "question_id": "Do9rDMUSzXkrAyqToMyKkx", "question": "How many zebras are in the image?", "choices": ["one", "three", "eight", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107274.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373449, "question_id": "DoSGxG9mBRXi6pwhgNhNzT", "question": "What kind of things are on the wall?", "choices": ["grafitti", "flyers", "paintings", "laws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373449.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550571, "question_id": "DqS5GKUFycKtb9gkYHYJfH", "question": "What is the name given to this type of bus?", "choices": ["suv", "sedan", "double deck", "arv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358638, "question_id": "Ds2TSFNe9JtnS4XHf8QiMx", "question": "What animals are shown out the window?", "choices": ["tiger", "elephant", "hyena", "lion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66740, "question_id": "Ds8yc9oZqQEF5wNtCeQKiy", "question": "Why is the giraffe near the hay in the cage?", "choices": ["to fight", "to sleep", "to eat", "to play"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066740.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153066, "question_id": "Dues3FGEor9sq6QiTVqPLg", "question": "What is the dog's nose closest to?", "choices": ["ball", "cat's tail", "picture frame", "backpack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000153066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229663, "question_id": "DwYQfKsSsMhphaLQVWmhMy", "question": "The plane is in what type of building?", "choices": ["terminal", "fuel depot", "control tower", "hangar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 383880, "question_id": "DyjyhWxeicCDh3DkFHcYQB", "question": "What is this person playing?", "choices": ["audiobook", "video game", "music video", "movie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000383880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481608, "question_id": "DzT8oTdTwSwQJ2qa9uBRAo", "question": "What is he standing in?", "choices": ["dirt", "water", "sand", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37426, "question_id": "DzcezsA3bQH4uLyYCLQkDi", "question": "In which building would you be most likely to find windows made of the same material as this humming bird?", "choices": ["sanctuary", "school", "church", "dog house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037426.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 111540, "question_id": "DzfpwCUXikqpGLiY66iys7", "question": "How many different types of clouds are in this part of the sky?", "choices": ["ten", "one", "two", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000111540.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562132, "question_id": "E46GN7pYw8YaWSRtGYa99z", "question": "Why is the ceiling above the desk slanted?", "choices": ["under stairs", "room decor", "construction error", "water drainage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349674, "question_id": "E5umqMDEtEU27g3xbKJH4U", "question": "Where is the skateboard?", "choices": ["garage", "railing", "shelf", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337729, "question_id": "E7fq9EVmKH5RdmCdM3kzk7", "question": "What type of shorts is the surfer wearing?", "choices": ["cargo", "denim", "board", "polyester"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483004, "question_id": "E9Tzcysn4nWkpMX9KrYJtT", "question": "What has the man seemingly jumped higher than?", "choices": ["villa", "plane", "park", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483004.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545746, "question_id": "EBiwCVP3LQFKB3nAooHVzY", "question": "What's causing the blurred part on this bird?", "choices": ["flapping", "stillness", "molting", "fledging"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000545746.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427684, "question_id": "EELxmaQafh8PYDVnsX2rJK", "question": "What former President has a last name that matches the name of the hall?", "choices": ["james monroe", "andrew jackson", "george washington", "abraham lincoln"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170938, "question_id": "EFA2rsrqm8LoTkTGeRFWk8", "question": "Which team does the batter play for?", "choices": ["phillies", "padres", "red sox", "pirates"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64959, "question_id": "EFTLjyYA4nBLAdNUFHftHN", "question": "In what type of location is this kitchen?", "choices": ["residence", "hospital", "restaurant", "school"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000064959.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35238, "question_id": "EFXb3tgMmZkPKb9HiuCSDo", "question": "What is he doing?", "choices": ["drinking coffee", "hiding cup", "cooling coffee", "drinking water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035238.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324998, "question_id": "EH7VoqkCEPLs2eDe9myah6", "question": "What are persons standing trying to select?", "choices": ["cocoa style", "lift options", "ski route", "mcdonald's meal"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000324998.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474235, "question_id": "EHEsLg96UP94pcJanr9uBS", "question": "Dark condition is due to the absence of what?", "choices": ["protons", "electrons", "photons", "neutrons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257904, "question_id": "EHXP9muCkYt7422ethuu3C", "question": "What condiments does this person like?", "choices": ["mayo", "horseradish", "ketchup mustard", "chile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420175, "question_id": "EHzoddxyLGQCGSY49KKpfS", "question": "What color background would make the man's shirt invisible?", "choices": ["white", "orange", "black", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350269, "question_id": "EKanSnfsKq9vpgQbhsjUsc", "question": "What can be said about the giraffe's front legs?", "choices": ["gesturing", "injured", "bent", "spread apart"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000350269.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79521, "question_id": "ELZMHP2XAozEsWXr24nU6B", "question": "What is a similar agency to the agency that owns the bus?", "choices": ["goya", "scotland yard", "amazon", "domino's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540399, "question_id": "EQ3wyq65hqPunQFTTDz9w8", "question": "What does the event's sponsor make?", "choices": ["phones", "cars", "guns", "computers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540399.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410469, "question_id": "ERbUvk254NmwtBGNLKGmHd", "question": "Which of these creatures with feathers shown here is likely to eat the other?", "choices": ["egret", "eagle", "pelican", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 261533, "question_id": "ESaAHz4VptNLwAdkHCuC94", "question": "What is she ready to do?", "choices": ["serve", "dribble", "dunk", "sprint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000261533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218753, "question_id": "ESxaVX2eem8omiCur6EDPv", "question": "Where are the ear of giraffe above facing to?", "choices": ["down", "up", "front", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339520, "question_id": "EVi4zxZmXuzKvuVG49ecBx", "question": "Pedestrians can walk in this intersection when the light is what?", "choices": ["yellow", "broken", "red", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339520.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109547, "question_id": "EXwDTUgVAsDbnQtgeoZCBH", "question": "What orange vegetable is shown?", "choices": ["squash", "pumpkin", "yam", "carrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109547.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138474, "question_id": "EYaNroLRtqYVbeeMXXcjVf", "question": "How is this food portioned to be served?", "choices": ["cubed", "sliced", "diced", "shredded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485864, "question_id": "EYicdJAabNWkwTpGYXZHyY", "question": "What kind of object would have to stop here?", "choices": ["car", "plane", "boat", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540289, "question_id": "EYz7rC4hXsjogejhhPavtV", "question": "Which year is written on the teddy bear's left foot?", "choices": ["2021", "2020", "2014", "2004"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540289.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206029, "question_id": "EZ9ggZ88ctL6EsyVYoWTe5", "question": "What does the event's sponsor manufacture?", "choices": ["clothing", "computers", "pet food", "cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206029.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267364, "question_id": "EaA5nZ8KFrJSuVLs5tzVdE", "question": "What kind of consumable is available at the train station?", "choices": ["smoothies", "burgers", "fries", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545881, "question_id": "EaVj6QmGqmhTt4Q83bBWLX", "question": "Who took this man's photo here?", "choices": ["no one", "spouse", "himself", "his mom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92908, "question_id": "EbSkHnaQKjzKHJGmsi49HC", "question": "Why is he bent over?", "choices": ["slipping", "balance", "hiding", "falling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332842, "question_id": "EbTNPNRQ5NadvpKsxAFBkL", "question": "Why is the cat laying on the laptop?", "choices": ["is hiding", "is posed", "is bored", "is tired"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273072, "question_id": "EcsVj9xpAVg3zSGZb8BrhL", "question": "What beverage will this person drink when waking?", "choices": ["toddy", "milkshake", "coffee", "beer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144490, "question_id": "Ee8FJu4FfaSB6Xj6BGfNhG", "question": "What is he preparing to do?", "choices": ["brush teeth", "shower", "laundry", "wash hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144490.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500707, "question_id": "EeZ2AZoeghpP5Ja5Cp2XW4", "question": "What is the job of the person shown?", "choices": ["trainer", "pilot", "janitor", "driver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36079, "question_id": "EesgVhfm75p3UBDzzSkJg2", "question": "The train tracks seen here carries whom?", "choices": ["tourists", "oil", "giraffes", "commuters"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000036079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 244783, "question_id": "Egr7kbnLqWvoe5JaPk7vX5", "question": "What specific kind of food is flanking the hot dog?", "choices": ["bread", "baguette", "cheese", "bun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000244783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66809, "question_id": "EhH3wdUXZuvwv85SoDZePg", "question": "What is the man in blue in?", "choices": ["water", "cannon", "house", "apartment"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354914, "question_id": "EheGnbZJJfbhSAqkjcxyVT", "question": "What is the sum of the three numbers that are on the train?", "choices": ["70", "16", "25", "nine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417821, "question_id": "Eike3yj9EZpxdfu9rBuGVS", "question": "What is the boy doing?", "choices": ["playing", "schoolwork", "posing", "dancing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417821.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 426251, "question_id": "Ek98XW5uEuCAd7sTKuucaM", "question": "Which war was fought before this lamp post was built?", "choices": ["civil", "wwi", "vietnam", "wwii"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000426251.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447640, "question_id": "EkCyCkh8B7CSYYc6ZVXdn8", "question": "Why can you see a red building in this image?", "choices": ["painting", "sun glare", "camera", "reflection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55794, "question_id": "EkoEPLBm3d2Lyz9tcsMsMV", "question": "What should go on the green dish?", "choices": ["butter", "ketchup", "dressing", "barbecue sauce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10279, "question_id": "EnCDcmm6bzjGZbw3Z3c9ur", "question": "Which current world leader is this stuffed bear negatively associated with?", "choices": ["joe biden", "justin trudeau", "xi jinping", "vladimir putin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341095, "question_id": "EnCgn4fXcu46qYkK6XqemF", "question": "What is typically the shape of the object the man is playing with?", "choices": ["square", "trapezoid", "circle", "triangle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544385, "question_id": "EnVSWeufzPfedjfxvGMD76", "question": "What is this person ready to do?", "choices": ["descend", "ascend", "hide", "roll"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544385.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 287346, "question_id": "Enc6j5HMuCnhSNu6cxT77u", "question": "What liquid can be seen in the top of a wooden cabinet?", "choices": ["beer", "soda", "wine", "milk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000287346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338272, "question_id": "EqUuzU4qaGFaWKm6fWWQyS", "question": "The cabernet hails from what region?", "choices": ["paso robles", "tenerife", "napa valley", "sonoma county"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338272.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64368, "question_id": "EqZMaQqu2M5BzCptX96RXK", "question": "What does the statue have on its head?", "choices": ["motorcycle helmet", "top hat", "earphones", "antlers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064368.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286631, "question_id": "Eqckw9YRu7iYHCabtz3wUp", "question": "What was placed on the cow to keep track of it?", "choices": ["tag", "bell", "sign", "horn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498429, "question_id": "EqvjsCXsuYecaNytspLSyQ", "question": "The item seen here is usually made of what material?", "choices": ["skin", "plastic", "cloth", "marble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90153, "question_id": "ErgoMVwS2q4rR93dH3QaBm", "question": "What kind of clothing can be made from this animal's skin?", "choices": ["rayon", "cotton", "leather", "polyester"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226288, "question_id": "ErinB6btAvwh5izqJ7RGPw", "question": "What is forbidden when traveling by car beyond the red and white sign?", "choices": ["parking", "exiting", "turning", "entering"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000226288.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285887, "question_id": "Et35h5fTSoKyLujt3kFo5j", "question": "What is the color of the vessel?", "choices": ["brown", "black", "white", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404835, "question_id": "EukDdbzJJ3qbxYA8RDNq9W", "question": "What military is the aircraft associated with?", "choices": ["dare devils", "flying tigers", "4105th army", "zoo patrol"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461658, "question_id": "EvHvSzqtPbNBMxCiam4fua", "question": "Why is most space in the truck in the bed?", "choices": ["more balanced", "more cargo", "more passengers", "better mileage"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000461658.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360583, "question_id": "EvaLirCm8rywCsegDZVJoP", "question": "Wii remote is used to play which game?", "choices": ["card", "audio", "video", "race"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000360583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204756, "question_id": "Ewba9ezmJYAHKwP9hhuTqt", "question": "The person in the bed lives in what type of city?", "choices": ["urban", "ghost town", "rural", "suburban"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441688, "question_id": "Ex7pu5drbAMFEd5upxbCqL", "question": "This company can help one with which one of these?", "choices": ["selling car", "changing addresses", "getting jobs", "makeover"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154482, "question_id": "EyThMqchawTnMyrosFwGMb", "question": "The bathroom sink is located in what type of venue most probably?", "choices": ["hotel", "school", "home", "stadium"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390648, "question_id": "EyfkyqP8ueA8otad5yDRZg", "question": "Which country is this railway located in?", "choices": ["germany", "united kingdom", "united states", "netherlands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501186, "question_id": "EzbQajC5LP3xcFWnuW8Wfk", "question": "What type of room is this most likely showing?", "choices": ["park", "restaurant", "hospital", "hotel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000501186.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 370057, "question_id": "F2QsbdMCgtsVZ8oZcgjMnx", "question": "The tall tower served as a warning for what type of vehicle?", "choices": ["truck", "ship", "airplane", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000370057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85833, "question_id": "F2uHjcTbNyfjJ8bWQYKPKK", "question": "Which one of these animals could be hiding in this terrain?", "choices": ["crab", "lion", "shark", "snake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355271, "question_id": "F3iinmnePSsJwMJfQX48oL", "question": "What are these people doing?", "choices": ["hopping", "racing", "eating", "wrestling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355271.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152710, "question_id": "F6qkcwrwvyCfx5DQvUQtcA", "question": "What is this zebra likely doing here?", "choices": ["eating", "drinking", "hunting", "walking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152710.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 237829, "question_id": "F9FbZn2akxLZWFUh8YNURT", "question": "The sign with the yellow triangle is there to prevent what?", "choices": ["injuries", "crashes", "theft", "arrests"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000237829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184264, "question_id": "F9HNVggTaAfKgDpgT2nHuN", "question": "What activity are the humans engaged in?", "choices": ["fighting", "entering", "chatting", "emerging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184264.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481089, "question_id": "FAdUCRaUFp8N5FgEZkb2jx", "question": "What are the brown walls of the pier made out of?", "choices": ["steel", "stone", "wood", "clay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481089.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416154, "question_id": "FAwa4MCSCemwSCsayek2A8", "question": "What function does the wetsuit provide the user here?", "choices": ["insulation", "stress relief", "fashion", "cooling"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000416154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442013, "question_id": "FELjJYuj9xXkFehivCKinQ", "question": "What sort of power supplies the topmost oven?", "choices": ["gas", "electric", "propane", "solar rays"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263894, "question_id": "FEuuZxY8oBxdvTypTYFs63", "question": "What is this dog ready to do?", "choices": ["eat", "sleep", "swim", "hide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265765, "question_id": "FEvvvN6BrEBRXCpociThFf", "question": "What type of transportation is shown?", "choices": ["land", "air", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235533, "question_id": "FFiXi7ECqJtFtbnbxvmxBU", "question": "Which animal has similar color feathers as the color of the blanket?", "choices": ["peacock", "parrot", "vulture", "pigeon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107509, "question_id": "FFvQ95XKGqfzSHkfdAAfUY", "question": "What is on the shelf of the hearth?", "choices": ["glass", "frame", "pillows", "baby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107509.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276687, "question_id": "FJLQoETozcoM6AhKANJ2rQ", "question": "What kind of activity is being performed by the zebra in the center?", "choices": ["grazing", "defecating", "breeding", "attacking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552469, "question_id": "FKbwSx7yvT6VGKdLVDsnCp", "question": "What is keeping the animals in one specific area?", "choices": ["traffic", "stone walls", "predators", "fencing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552469.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 526651, "question_id": "FLiRAqVYdJ7PrNTGUyGV8P", "question": "What is elevating the object in the air?", "choices": ["string", "nothing", "stand", "man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000526651.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175276, "question_id": "FMJLVDis9jJHdhBoksfSVQ", "question": "What type of activity is being performed?", "choices": ["rescue", "work", "leisure", "artistic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175276.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48512, "question_id": "FNcwVcAqZHb7VmN2eKu7gQ", "question": "If someone wanted an Ice cube what part of refrigerator should be opened first?", "choices": ["none", "bottom", "side", "top"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048512.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215533, "question_id": "FNxW5Gvu2mvceH9D3zctkM", "question": "What is the man cutting with the scissors?", "choices": ["stuffed animal", "paper", "candy", "cardboard box"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79521, "question_id": "FPCgk452HhG58HhMTRs528", "question": "In which Asian country is this police bus located?", "choices": ["vietnam", "china", "laos", "thailand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108590, "question_id": "FRWi4UZLVm87DNpQD6kN7L", "question": "What kind of writing is shown on the table?", "choices": ["cursive", "d'nealian", "hieroglyphics", "print"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162258, "question_id": "FRjz7gWYM8ArKbGVV2xq4g", "question": "Where did the elephant come from?", "choices": ["water", "grass", "desert", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251488, "question_id": "FRvogQqgCahaN92J22Akai", "question": "What is near the items on the water?", "choices": ["windmill", "shark", "lighthouse", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251488.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38617, "question_id": "FTyUGBRyt5vSLQFDSdrGX8", "question": "In which style will this person next hit a ball?", "choices": ["forehand", "two handed", "backhand", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038617.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231968, "question_id": "FUparsxx9jp3MFpByKQD58", "question": "The old man has dedicated his life to living in the wild with what?", "choices": ["wolves", "hippos", "lions", "bears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98235, "question_id": "FVy2BHoXniudphJGPt6eDn", "question": "What type of transportation is shown?", "choices": ["air", "water", "land", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162513, "question_id": "FYtuPfwk6FrzCS8uDKbaB5", "question": "What is on top of the elephant's head?", "choices": ["leaves", "snow", "sand", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162513.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533996, "question_id": "FboajbJRcE3UW3JhTjT288", "question": "This animals owner has noted it by what symbol?", "choices": ["three", "cross", "w brand", "bessie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186341, "question_id": "Fc32GmwfrJSbwZnN2STNi2", "question": "What is the man's line of work?", "choices": ["cop", "teacher", "cook", "fireman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 380402, "question_id": "FdCnPBA8piSsQZb2egYTYW", "question": "What is the cat watching?", "choices": ["cars", "clouds", "television", "birds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000380402.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535346, "question_id": "FdVXrX7eRemhmymiZfeB7Z", "question": "What appears at the base under the clock?", "choices": ["window", "door", "cat", "face"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306649, "question_id": "FdmPMPfYX995yfbqGY4MHr", "question": "Where is this cat located?", "choices": ["yard", "wild", "kitchen", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231304, "question_id": "FecH7WudfyZkLqTxhW6KMA", "question": "What activity is this person engaging in?", "choices": ["school work", "self pleasure", "competition sports", "casual sports"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420985, "question_id": "Ff2rShs3wHRkwCNuLEhZui", "question": "Where is the baby that the teddy belongs to?", "choices": ["home", "prison", "hospital", "inside belly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420985.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2945, "question_id": "FfqySuKZ6dtBgMSHzocbTY", "question": "What is the dog trying to dog while on the ground?", "choices": ["escape", "roll", "eat", "jump"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002945.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403022, "question_id": "FfznMgmLVv6pSsvoUBE5xW", "question": "What is this cooler meant to store?", "choices": ["vegetables", "ice cream", "beer", "meat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403022.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79529, "question_id": "FjtgPDMTkKfAiLdA6UcFJt", "question": "Why are the boards stood on end?", "choices": ["broken", "stolen", "keep clean", "for sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079529.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404219, "question_id": "FjvoTChJEdRWwM8Yhc4DfH", "question": "What is the proper thing to do to the containers when finished?", "choices": ["eat", "save", "discard", "recycle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404219.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401304, "question_id": "Fn6xo7c5EEaqTgGMvwLzQw", "question": "Kwality walls frozen dessert is made up of which fat?", "choices": ["none", "milk", "vegetable", "animal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294184, "question_id": "Fp42nuwGjwKjopuC3hYdr2", "question": "What does the dog have in it's mouth?", "choices": ["bottle", "tennis ball", "food", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189754, "question_id": "Fqo7Zth75cg9bh9iGiyeQF", "question": "What animals are standing between the giraffes?", "choices": ["bear", "pig", "cow", "zebra"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189754.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164742, "question_id": "FrYpPZAHmqzCWdFBuNY4go", "question": "Does the man in tan want the ball to go in the net?", "choices": ["maybe", "no", "unsure", "yes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460957, "question_id": "FsW6UuTYJgFiMMNhkwKe52", "question": "What can one obtain from the structures by the light post?", "choices": ["phone call", "newspapers", "lotto tickets", "cigarettes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460957.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144989, "question_id": "FsWZoyRFuSUxBA3Nyq8CAs", "question": "What does the person who made this sign exhort you to do?", "choices": ["leave", "quit", "act", "turn back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000144989.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275756, "question_id": "FtVJRyRWcoXBiDYxeiF5yf", "question": "What is he holding in his hand?", "choices": ["pen", "banana", "hat", "keys"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60195, "question_id": "FtjYs6sDo9wzubfdS9RD82", "question": "The hands of the clock resemble what?", "choices": ["bees", "forks", "swords", "cats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060195.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286781, "question_id": "FtyJYmCZRp3fcPUePSZpTK", "question": "What type young might this species bear?", "choices": ["cub", "children", "calfs", "small fry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21114, "question_id": "FtzMb7gB7gXn6j5RbrybuX", "question": "What type of camera was used to take a picture of the pizza?", "choices": ["polaroid", "digital camera", "camcorder", "slr"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021114.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532138, "question_id": "Fu9FjiMrhc42piyhLJBDUX", "question": "How would he close the top of his shirt?", "choices": ["buckles", "buttons", "laces", "zipper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47809, "question_id": "FvJYcAz3DBdm72QHerWQe3", "question": "What is most likely the season?", "choices": ["summer", "fall", "spring", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000047809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473120, "question_id": "FvPavQJr3LFUyXo6p6yw3Q", "question": "What is a common topping for this food item?", "choices": ["ice", "meatball", "cream cheese", "cherry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504627, "question_id": "FwCpxpWyswpv7vidKVX3fF", "question": "In what part of the United States is this train traveling?", "choices": ["mid-atlantic", "northeast", "west", "south"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 210911, "question_id": "FwazKLrgVXJkZdUvGsCVa2", "question": "Where does the man seem to be laying?", "choices": ["in office", "in bed", "in library", "outdoors"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000210911.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 14350, "question_id": "FxoPdYAE7jctBdwN9x7Dey", "question": "Where are the trains making stops at?", "choices": ["station", "bank", "park", "library"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000014350.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131848, "question_id": "FymnURKtrepKSSiYvhfVnM", "question": "What type of water sport are the two men trying?", "choices": ["windsurfing", "kitesurfing", "bodysurfing", "wicksurfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000131848.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156775, "question_id": "Fzf7P4x6xxTiXuRuRCwach", "question": "What is the dog being leashed to?", "choices": ["sign post", "bench", "cooler", "hydrant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227837, "question_id": "G2quzqeqpXuW9ZVRZGy6he", "question": "What species resides inside this living space?", "choices": ["rodent", "porcine", "feline", "ursine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328767, "question_id": "G2y8ghoUoibibzbrVR4esu", "question": "What president was this type of toy named after?", "choices": ["adams", "roosevelt", "harding", "lincoln"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553999, "question_id": "G4Lcko98LQF8XYvRUzz4kj", "question": "What are these appliances used for?", "choices": ["call", "control", "cool", "cook"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000553999.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314550, "question_id": "G4yK8gErSvVMuLMX65NFxt", "question": "What is a term for the man's attire?", "choices": ["bikini", "wetsuit", "shirtless", "diving suit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503817, "question_id": "GAbBX2VPQWp2aRq39V5vQD", "question": "What activity is the man performing here?", "choices": ["bicycling", "pushups", "skating", "surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319753, "question_id": "GAkAJNg7TrDk4GVTkwUPuD", "question": "Which seat is the most comfortable?", "choices": ["floor", "chair", "couch", "table"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126330, "question_id": "GDpnjgCD8NMBDrvDeb8jA8", "question": "What is this stick being used for?", "choices": ["flying", "rudder", "diving", "fishing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126330.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503062, "question_id": "GDyZ7T5Fra8zzm8c8DDGeP", "question": "The right half of the pizza would be eaten at which part of the meal?", "choices": ["dessert", "appetizer", "main course", "hors-d'oeuvres"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71974, "question_id": "GFSdXPX6uAHaWaMCcrwPBp", "question": "What color is common between all three bears?", "choices": ["white", "yellow", "tan", "dark brown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071974.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53303, "question_id": "GGZoJeHZpjGwdmrprfKkZh", "question": "What country is this?", "choices": ["israel", "qatar", "china", "japan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541070, "question_id": "GKuuUpAbHri3VuqhDQ6Qci", "question": "How would you close his sweater?", "choices": ["buttons", "laces", "velcro", "zipper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212872, "question_id": "GLAVz3HZkRKws3fLRUTUS8", "question": "These type of grounds are referred to as what?", "choices": ["manicured", "wild", "unkempt", "barren"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212872.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26700, "question_id": "GLBpoWbdMAAMwmniBmsEdf", "question": "The small door to the left of the refrigerator and above the grill is used for what?", "choices": ["hvac equipment", "electrical panel", "food storage", "network panel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026700.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89237, "question_id": "GLMP2KkQcfpGj8kWkCqxC6", "question": "What does the sign indicate is located ahead?", "choices": ["bus stop", "boats", "trains", "planes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230758, "question_id": "GMdSh6HZ4sSPmiTDbCqhkT", "question": "What kind of transportation is this?", "choices": ["water", "rail", "land", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230758.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243586, "question_id": "GNWtFouW6WcSe6qL2RDLZ8", "question": "What would a weather report say about the weather on this day?", "choices": ["snow", "rain", "overcast", "partly cloudy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231403, "question_id": "GPF6DqwbhHjZXZUcneczHP", "question": "What would you do if you needed to get to Champion Hill via 29?", "choices": ["make uturn", "sit down", "turn left", "turn right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231403.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4040, "question_id": "GRwubdK3BNtGxa5B8aiSLL", "question": "What type of medical procedure is being done with the equipment?", "choices": ["x-ray", "ultrasound", "cat scan", "mri"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160379, "question_id": "GSf7NQsdiPJmJysq9yyuRy", "question": "What part of the human body would the item the cat is sniffing be worn on?", "choices": ["feet", "hand", "nose", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160379.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175865, "question_id": "GSsEhVXTSTBcGrxj5t35SD", "question": "What can one probably do inside where the photographer is?", "choices": ["buy shoes", "buy ticket", "buy car", "buy clothes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175865.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235951, "question_id": "GVCFdiayy5xAR2Sjbsf3Q4", "question": "What is the man with his hand up wearing on his head?", "choices": ["crown", "fedora", "laurel", "cap"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235951.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525925, "question_id": "GVQmoeMfAqsZCiHV3BwFAP", "question": "What are the people doing?", "choices": ["swimming", "kissing", "dining", "leaving"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191971, "question_id": "GXR5XJHBmFFMDW5sKqE54C", "question": "What color is the barrier all the way to the left?", "choices": ["purple", "red", "green", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454600, "question_id": "GaXYsCKd3fDp7k6ocDM8Ca", "question": "What word begins with the same letter that appears on the sign to the left?", "choices": ["groom", "room", "loom", "broom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399493, "question_id": "GbYo84TddQ3aBA3GTfmpda", "question": "What liquid does this umbrella protect from?", "choices": ["acid", "lava", "water", "alcohol"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452476, "question_id": "GcCPD56g5oyYYUUMsfGoSm", "question": "Why is the tiger harmless?", "choices": ["baby", "slow", "stuffed", "tired"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139131, "question_id": "GdPLpsQkooPvsLn7yur66T", "question": "When was Motorola founded?", "choices": ["1933", "1928", "1929", "1931"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139131.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329149, "question_id": "GjxTSzowikvHsCvwLPFp4Q", "question": "What word would best describe the skateboarder's stance?", "choices": ["fakie", "closed", "wide", "open"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329149.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 116458, "question_id": "GkLvGNhxp7qECiYgPUCdS8", "question": "In which country is this car presently parked?", "choices": ["mozambique", "usa", "england", "australia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000116458.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 455589, "question_id": "GqJP2smKHrGHgdarXTEnGq", "question": "What is the pasta shape used in this dish called?", "choices": ["shell", "rotini", "penne", "linguine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000455589.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463863, "question_id": "Gqqmtqm3p6dq5VRsv4Mjcu", "question": "What is near the sign on the building?", "choices": ["parrot", "umbrella", "poster", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349484, "question_id": "GrS6wF7dXkvzEtfCDpy6Kj", "question": "What part of this treat is most likely to stain one's clothing?", "choices": ["filling", "crust", "bottom", "crumbs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349484.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 237164, "question_id": "GsaBR7gCKTP9fSVGbuzL7g", "question": "What is microwave frequency range Mcq?", "choices": ["2ghz", "4ghz", "1ghz", "3ghz"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000237164.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524365, "question_id": "GtmtQqtffoo4UJ6vtRh4XJ", "question": "What is the thing on the back of the zebras neck made of?", "choices": ["skin", "muscle", "blood", "hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524365.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86656, "question_id": "GwqDCsGLGposiJDWPjVPbw", "question": "Which female starred in the film with the same name as the boat?", "choices": ["rita hayworth", "ingrid bergman", "bette davis", "katharine hepburn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9089, "question_id": "Gx5RcmVajCL9fjBRntpqVf", "question": "What is this zebra ready to do?", "choices": ["run", "eat", "attack", "hide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009089.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170894, "question_id": "GxEEYRpLU7YWwjG95Emyft", "question": "What is the person pictured above doing?", "choices": ["diving", "walking", "riding", "surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68533, "question_id": "Gxq6hytDCADafyHbp4ZgU9", "question": "What might you commonly find in the white item?", "choices": ["tinned fish", "tinned beans", "milk", "bread"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 165790, "question_id": "GyhJMfHCLUEqkkYk9TS8rf", "question": "What is on the bed?", "choices": ["blanket", "quilt", "comforter", "sleeping bag"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000165790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 242835, "question_id": "GzmcoPtJtQTMaJUtb4a8JN", "question": "What activity would be feasible to perform in here?", "choices": ["lecturing", "tooth brushing", "gaming", "studying"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000242835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316376, "question_id": "H249gMnhLyEf5aTSw5ezNE", "question": "What company is known for this type of vehicle?", "choices": ["sherman", "tesla", "greyhound", "ibm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316376.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 278406, "question_id": "H2W47j9HGeWLDKT4rKgMwx", "question": "What type of animal is the young girl carrying around?", "choices": ["fox", "cat", "ferret", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000278406.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443897, "question_id": "H2hiHUpgNGKc9Fp692PKAC", "question": "What is the knobby part of the zebra's leg called that sticks out behind the hoof?", "choices": ["knee", "cannon", "fetlock", "pastern"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443897.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86915, "question_id": "H3V4MMjdJWdJLtYCYZ9ThH", "question": "The bus here is powered how?", "choices": ["propane", "diesel", "electricity", "horse drawn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086915.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229044, "question_id": "H4NoH3P5uj97DLVoZFfZsw", "question": "What would happen if someone climbed the pole to retrieve the kite?", "choices": ["electrocution", "flight", "arrest", "success"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59389, "question_id": "H6TxFtGPZzkztarFYqQqxk", "question": "This structure implies that Pluto can do what?", "choices": ["dance", "surf", "cook", "golf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 564871, "question_id": "H9FoftXFucSgGReTbZuZPt", "question": "What do the benches provide a view of?", "choices": ["trees", "canyons", "water", "mountains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000564871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69414, "question_id": "HAL6iCXPkReNPex65fN7q3", "question": "The cabinet knobs are made from what material?", "choices": ["steel", "wood", "brass", "aluminum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000069414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 312264, "question_id": "HASQgFu9s4zw36CbEPg2XJ", "question": "What kind of flooring is in the room?", "choices": ["vinyl", "shag carpet", "hardwood", "tile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000312264.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547379, "question_id": "HAT2gn3826RcYTG8CcBTsa", "question": "What would the average person do with flowers seen here?", "choices": ["toss them", "press them", "water them", "regift them"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547379.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478095, "question_id": "HBHQL8aLHsYPvqrU3iSvcj", "question": "What is the average lifespan of the animal in the picture?", "choices": ["25 years", "40 years", "10 years", "5 years"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343103, "question_id": "HCYJEoWsV8JzNLS6YALG6k", "question": "Which part of town is this sign most likely in?", "choices": ["outside town", "central", "uptown", "downtown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343103.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561024, "question_id": "HCtRhxHcMo3BdyZPjhk5H7", "question": "What are the large white items on the pizza?", "choices": ["cucumbers", "onions", "cheese", "mushrooms"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518209, "question_id": "HEAGXiTLVSaXZeqQffc3xe", "question": "What does the wall-less structure at the top of the tower contain?", "choices": ["telescope", "radio tower", "bell", "cell tower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143397, "question_id": "HERjBMbjUNpLMdDCNkA9Un", "question": "What is in the center of the tray?", "choices": ["dog bone", "human food", "cat food", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000143397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101521, "question_id": "HHL8isJs2UfomAheauPDkv", "question": "What flowers seeds are visible here?", "choices": ["poppy", "carnation", "radish", "daisy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000101521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50268, "question_id": "HKjtpD9hfHfNGuMkHTrdwS", "question": "What character is the same type of animal as this one?", "choices": ["underdog", "yogi bear", "snoopy", "sonic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050268.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269648, "question_id": "HKyv6KesVrurC6QtYdUSz6", "question": "Who used this brush on the child's hair here?", "choices": ["baby", "stranger", "little sis", "mom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570159, "question_id": "HMKFhVbhuDjBApu95pSi4e", "question": "What type of outfit is the man wearing?", "choices": ["tuxedo", "basketball jersey", "baseball uniform", "wet suit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570159.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289259, "question_id": "HNi4EyQWr38PC96KeNFAmY", "question": "What presents the most danger to the horse?", "choices": ["sun", "birds", "cliff", "man"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289259.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197468, "question_id": "HNo8FhsWGWm4aAcjootgY3", "question": "The person here has what official title?", "choices": ["none", "life guard", "beach comber", "bodyguard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197468.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330989, "question_id": "HPTNTYXNTqqs8rujLMhhM6", "question": "Which parts of the bird is a similar color as the top of it's head?", "choices": ["lower body", "tail", "side", "feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330989.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504935, "question_id": "HQSx8rvh5iLiiHL5dinH9v", "question": "What species of animal was this frisbee meant to be thrown here?", "choices": ["rodent", "feline", "canine", "human"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504935.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369635, "question_id": "HR6Yh5VDaRag4THNujSaAF", "question": "What material is being used to keep the chickens enclosed?", "choices": ["wire", "straw", "rope", "thread"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369635.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339739, "question_id": "HT4o8fz6YM4snFaHxHUr6F", "question": "Someone looking for scouring cleanser might look where in this room?", "choices": ["medicine cabinet", "under sink", "in shower", "on sink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339739.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550859, "question_id": "HTj8X2aSxXcDjCfaK2wfTf", "question": "What activity could not take place here?", "choices": ["biking", "boating", "fishing", "skiing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550859.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91992, "question_id": "HUieAB3omSHeJqrqzHzDLR", "question": "What is the moving plane ready to do?", "choices": ["descend", "ascend", "crash", "flip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091992.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40868, "question_id": "HWJGKE8fCHUN7mTYTzonD8", "question": "What type of clock is shown?", "choices": ["analog", "digital", "cuckoo", "auditory"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75645, "question_id": "HZta9N4okdb6LsNpgTZYYM", "question": "In which is the the city on the bottom sign located?", "choices": ["washington", "arkansas", "tennessee", "california"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075645.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199069, "question_id": "HaMsDunEqWTWnk6Vrc6hPZ", "question": "What is preventing the person's hand from getting dirty?", "choices": ["napkin", "non-messy food", "paper bag", "plastic bag"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 150881, "question_id": "HaegBHnodChMrvoKZjQQtp", "question": "What does this animal like to eat?", "choices": ["plants", "cows", "fish", "berries"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000150881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52054, "question_id": "Hf9aiXDWiXHyoo6nTwBRMk", "question": "If there is a fire in this area where should one look to set off the alarm?", "choices": ["floor", "upper wall", "door", "floor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 312699, "question_id": "Hfy78FU8a7NG39NbEsaACA", "question": "Which of the following foods cannot be obtained from this animals in the picture above?", "choices": ["all above", "milk", "eggs", "meat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000312699.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358508, "question_id": "HgEdNJ7NKj4Uf4hTUmjgRA", "question": "What is the first name of a very famous player of this game?", "choices": ["tina", "josephine", "emily", "serena"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97570, "question_id": "HgWCTfrVxyjvKZ3gvc42E9", "question": "Where are the helmeted men located in relation to the images shown?", "choices": ["before photographer", "behind photographer", "no where", "photographer's left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000097570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367455, "question_id": "Hii3iZx4Zf2D3dbjiKYJe8", "question": "What task is this person involved in?", "choices": ["fruit picking", "phone calling", "bird killing", "photography"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367455.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115708, "question_id": "Hkdz3pX2hU3pCNzS3pUECx", "question": "What purple vegetable is this sandwich topped with?", "choices": ["none", "beets", "eggplant", "cabbage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163786, "question_id": "HmZcanTeGmjzqbjXZgHxuL", "question": "The board that he is in the water lying on is most likely what kind?", "choices": ["paddleboard", "waterboard", "surfboard", "bodyboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163786.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295781, "question_id": "HorQNoHTcUPsjAPhV3H6pa", "question": "How does this person feel about this wave?", "choices": ["disappointed", "ecstatic", "stoked", "hopeful"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000295781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541862, "question_id": "HqvZzKLV9h9kVfMpujeFMj", "question": "This vehicle runs with what power?", "choices": ["engines", "electricity", "propellers", "solar power"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000541862.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93076, "question_id": "HuXsPRVQfRarGSPHALx6XL", "question": "What animal is the stuffed animal?", "choices": ["dog", "cow", "horse", "pig"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475487, "question_id": "Hur3K4JmLnx53T9U5uqQN5", "question": "Where is the child likely seated while eating the banana?", "choices": ["stroller", "bench", "lap", "highchair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000475487.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 496540, "question_id": "HuroHen2qtZLc5ZMGMDh56", "question": "Why does the elephant have no tusks?", "choices": ["lost them", "sold them", "is mutation", "very young"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000496540.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556446, "question_id": "Hv7R36hX5TELo3h4yvnKne", "question": "At what kind of transportation is this clock located?", "choices": ["airport", "subway", "train station", "bus terminal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186872, "question_id": "HvWUv2UZ3FFHvk3vJQG3aq", "question": "How much is a ticket to ride this bus?", "choices": ["free", "10 dollars", "1 dollar", "5 dollars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186872.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473471, "question_id": "HwqHKftQgjwcxPEAWnqngg", "question": "What does the object in the woman's hand provide for people?", "choices": ["calculator", "ruler", "time", "weight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473471.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15912, "question_id": "HyY8q7i9U4J2Y99yRj6vAg", "question": "What is the white item called?", "choices": ["molar", "powder", "sugar", "tusk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015912.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50237, "question_id": "HyeFPVsPWvy2vnwfXogdt5", "question": "What kind of institution would be an advanced from of the location seen here?", "choices": ["mall", "museum", "supermarket", "college"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90153, "question_id": "Hz8AuBesPRyqbUhPdwLPir", "question": "The animal's weight is most likely in what measurement?", "choices": ["tons", "pounds", "micrograms", "liters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93628, "question_id": "J3gFQuo5wKHFoJ25iXBSsz", "question": "What does the yellow sign mean to motorists?", "choices": ["yield", "no parking", "roundabout", "stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093628.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433092, "question_id": "J6PFYmmzdx4FEsumUeKfev", "question": "What does the white broken lines in the middle of the road mean?", "choices": ["no overtake", "decoration", "pedestrian crossing", "can overtake"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000433092.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570433, "question_id": "J6SqYCyRUrydK54gDsgksG", "question": "Where did this person move from to make this move?", "choices": ["platform", "tall fence", "short fence", "ground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570433.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254120, "question_id": "J76mtRKa8kz7FBUKkYt5gz", "question": "These animals represent what type of animal?", "choices": ["equine", "canine", "feline", "avian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 363626, "question_id": "J9BGEgEkG9W4WgZNhmjnjJ", "question": "This sport requires a strong what?", "choices": ["personality", "upper body", "lower body", "bite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000363626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544572, "question_id": "J9jUNapLVGAskHTDxArj9r", "question": "What kind of chemical compound produced while running this bike?", "choices": ["carbon", "nitrogen", "cfc", "hcfc"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544572.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292479, "question_id": "JA4DdJPhVMHnYq5uYmX9XP", "question": "What are the giraffes standing on?", "choices": ["carpet", "asphalt", "grass", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267741, "question_id": "JBoVfuSqUupbyQBBQEZUex", "question": "What type of animal is located inside the cage?", "choices": ["dog", "cat", "kangaroo", "hamster"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267741.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205165, "question_id": "JD2eA7UNVfheaZX3sNARCk", "question": "Where is the car headed?", "choices": ["home", "for water", "repair shop", "virginia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205165.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345424, "question_id": "JDPYxkr6na4nSFXnor9QeF", "question": "This dog's owner plays a game that involves what?", "choices": ["basket", "net", "dessert", "chess"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407937, "question_id": "JERtcd8Jt5AazWGt5Tkbp4", "question": "Why would he have difficulty using this device?", "choices": ["can't walk", "no hands", "no eyes", "can't talk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000407937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576769, "question_id": "JFbvDYzbxMNWBSrSQfSucJ", "question": "The top half of the outfit is usually worn when?", "choices": ["corporate meeting", "beach party", "wedding", "graduation"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000576769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96635, "question_id": "JFtxswCfn4MMyikD9EFimE", "question": "What material is the floor made of?", "choices": ["tiles", "wood", "carpet", "vinyl"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096635.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403493, "question_id": "JHK7nzmku8deh8RtW4zUg7", "question": "Which activity is being displayed in the mirror here?", "choices": ["selfie-taking", "flossing", "web browsing", "phone buying"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403493.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506340, "question_id": "JHzJAEQBz9rZckuQVMWKua", "question": "Which direction are the skiers shown here going?", "choices": ["upwards", "nowhere", "sideways", "down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498240, "question_id": "JKsLfTjJBkSmbkmdCJFXP6", "question": "What is this type of bus called?", "choices": ["shuttle", "double decker", "mini", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439478, "question_id": "JPq3sgtSMU37TzkUZTKEPa", "question": "What food habit the zebra's had?", "choices": ["none", "herbivores", "carnivores", "omnivores"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439478.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346306, "question_id": "JPrWzGXVbuyXm2ZbHFkjPk", "question": "A player on this team would be of a different what?", "choices": ["weight", "race", "height", "gender"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 481263, "question_id": "JR7iarit8Wmr2wC2WzgEoQ", "question": "What animal is seen on the person's shoes?", "choices": ["bird", "lobster", "puma", "crab"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000481263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114243, "question_id": "JT9zy4rNq8Jke46sC6iybm", "question": "What body part is usually used to turn the TV off by this person?", "choices": ["voice", "whole hand", "thumb", "pinkie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29636, "question_id": "JULZaKJxyx7KrW63UdKF8m", "question": "Where would you not find this animal?", "choices": ["estuary", "river", "beach", "inner city"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029636.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420439, "question_id": "JUxiXcVk8j5dJbsuTc4pMV", "question": "What might you find in the silver thing on the side?", "choices": ["coffee", "cereal", "eggs", "petrol"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420439.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317314, "question_id": "JVQeTQWM9hFnGCnyec4jWK", "question": "What is near the large gray item?", "choices": ["flowers", "cat", "dog", "umbrella"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435826, "question_id": "JWJsmAjoMmnj6VQeG9Mphe", "question": "What feature of this area favors this activity?", "choices": ["clean water", "deep water", "big waves", "quiet environment"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435826.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472653, "question_id": "JX6Ke6cUhmLaanDZEhJZU6", "question": "Why is this pie cut into pieces?", "choices": ["individual servings", "easier discarding", "looks better", "for sale"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000472653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351694, "question_id": "JXjefcH3T2Li7ED4a9LfVM", "question": "The person here wants to utilize what for movement?", "choices": ["ship", "oars", "wind", "whales"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351694.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209696, "question_id": "JZhnSjSbALtCGRbK93LvNs", "question": "The wires are there to monitor what?", "choices": ["vitals", "eating", "happiness", "growth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209696.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187956, "question_id": "JZtCEEVWqJHQeMvAnZJbKm", "question": "What is the name of the two prominent protrusions?", "choices": ["mandibles", "antlers", "teeth", "horns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000187956.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373448, "question_id": "JZyrQiSb5yybsAzzQh774k", "question": "What type of bus is this?", "choices": ["tour", "school", "prison", "commuter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373448.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52888, "question_id": "JaSaGyaQFq8JWsiVLyTr83", "question": "What is the horse riding on?", "choices": ["track", "water", "beach", "obstacle course"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052888.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265727, "question_id": "JbfSUHDHQHKtWkwTn5mYer", "question": "Horses have passes this way at least how many times before?", "choices": ["five", "seven", "none", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397869, "question_id": "Je4a8Bqyj7jU5Q73Hs7RTs", "question": "What kind of shorts is the man in the middle wearing?", "choices": ["bicycle shorts", "cargo shorts", "board shorts", "gym shorts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397869.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286750, "question_id": "JfzqF9vqCadAY6afRzpmM5", "question": "What us needed for the thing the man is grabbing to move him along the waters?", "choices": ["electricity", "wind", "gravity", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286750.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89626, "question_id": "JgrAVkxH5EsaUFfjrH3fuA", "question": "What is the top of the wave called?", "choices": ["foam", "top", "trough", "crest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362833, "question_id": "JiHoPRcZwU6rjeoGW67HKj", "question": "What is the probability the cameraman will catch the Frisbee coming at him?", "choices": ["very high", "high", "medium", "low"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478183, "question_id": "JivVsXQGPeWoyf2RH2ZdUa", "question": "What is the person on top of?", "choices": ["horse", "hammock", "ramp", "bench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478183.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63267, "question_id": "JjRivrqC8WxPX2fUKqJZgH", "question": "What animals are shown in the photo?", "choices": ["ram", "rhino", "pig", "horse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000063267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209790, "question_id": "JkHnZCGitwjbWgs2zJ7KzW", "question": "What living shape does the red thing on the pole represent?", "choices": ["dogs", "cats", "humans", "tigers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542392, "question_id": "JmCzsnJZJwRupspUYy6VMq", "question": "What was this actresses middle name?", "choices": ["leni", "susan", "grace", "michelle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180290, "question_id": "JoDTw2DKPDSySJwi2L88qu", "question": "What topping is on the pizza?", "choices": ["pepper", "noodle", "butter", "mushroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180290.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453608, "question_id": "JpsQrd3qXt27FkbSfxLMKq", "question": "Why is there a mirror above the sink?", "choices": ["helps shaving", "see behind", "keeps clean", "is random"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000453608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546069, "question_id": "JqmfppZG2ABN55UnwMJcRn", "question": "What does the man want to do with his right hand now?", "choices": ["throw frisbee", "slap someone", "sit down", "catch frisbee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298104, "question_id": "JrGxKYxZF3U4KBLvr7u7Wy", "question": "What is required for this activity?", "choices": ["water", "snow", "sun", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329924, "question_id": "JsaQYTgpBxdNQGzdKwwzUh", "question": "What office would one visit to take advantage of this offer?", "choices": ["lawyer's", "janitor", "leasing", "doctor's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329924.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44529, "question_id": "JutGAUzQGgMTxvEvsThznE", "question": "The top of the muffins have been season with which spice?", "choices": ["ginger", "vanilla", "cloves", "cinnamon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044529.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37892, "question_id": "Jx2CbUGHVVB3fH2hZeMSxf", "question": "How many people probably share this bathroom?", "choices": ["one", "two", "three", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130089, "question_id": "K2Ffn4pXSzY3cdJX4543dK", "question": "What would people generally not do in this room?", "choices": ["brush teeth", "bathe", "cook", "shower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000130089.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320475, "question_id": "K2U7xXN8VzDBRFtGhxqMr5", "question": "What is the girl holding in the image?", "choices": ["bowling ball", "baseball bat", "tennis racket", "basketball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420760, "question_id": "K34DqdizbH4ZugqBdfeAyZ", "question": "Upon which high flying conveyance might the person who owns this luggage travel soon?", "choices": ["train", "flying bike", "airplane", "surfboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195001, "question_id": "K48xfHS5FcKVHkX5FQA3DK", "question": "What type of lane is the cameraman in?", "choices": ["slow lane", "speed lane", "carpool lane", "bike lane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195001.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93265, "question_id": "K5s4tZVbgFGpmg2EpKygGn", "question": "What is the same color as the woman's pants?", "choices": ["onions", "relish", "ketchup", "mustard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3486, "question_id": "K6EDK7JN6MtDF52ERE2nRZ", "question": "Where would the animal depicted by the kite normally live?", "choices": ["water", "sky", "land", "underground"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003486.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404574, "question_id": "K8Qzbz7sDNWfJQEHYSPUjz", "question": "What is the name given to the above?", "choices": ["encyclopedia", "dictionary", "story book", "bible"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404574.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 370734, "question_id": "K8RqfzrjjKA2wgpEuCcubR", "question": "What was the first item ever sold under this brand name?", "choices": ["washing machine", "sewing machine", "stove", "dishwasher"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000370734.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525846, "question_id": "K969qkCMspR4pCVKWZn6S4", "question": "What is the truck in this image commonly used for?", "choices": ["animals", "tools", "moving", "painting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175614, "question_id": "KAsPGAXTeFnZkBhtQrceKU", "question": "What show was related to the word on the boat?", "choices": ["house", "seinfeld", "baywatch", "friends"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175614.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414842, "question_id": "KB77vgibBUmSM7iTT7LWaW", "question": "What are these visitors looking through in Paris France?", "choices": ["clock", "telescope", "window", "bridge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326150, "question_id": "KCC4ZK6NKvbYfjqYM3PhAg", "question": "What is the train above?", "choices": ["parking lot", "highway", "side street", "intersection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176611, "question_id": "KD69fUWYri7WcvWxqBWNSz", "question": "Which one of these items is in the same family as the food he's eating?", "choices": ["almond", "plantain", "potato", "coconut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000176611.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575169, "question_id": "KDWuPBhxhqECRsqNgteydQ", "question": "What is it called when she falls off?", "choices": ["wipeout", "slip", "digger", "fall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575169.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453572, "question_id": "KEjnRdFJkyEUiv93L2F453", "question": "What type of road structure is indicated by these signs?", "choices": ["ramp", "bridge", "intersection", "interstate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453572.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271062, "question_id": "KFFktkMirnLATrodLYNXmu", "question": "What is the bear supposed to be?", "choices": ["soldier", "punk rocker", "cop", "doctor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204169, "question_id": "KHCmoKUrTMvmDVVhLxhhFA", "question": "What is tied to the man's leg?", "choices": ["life preserver", "phone", "surfboard", "camera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204169.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252767, "question_id": "KHcbDA6tpeMjo6prdF84QT", "question": "What could the round item in his hand be used for?", "choices": ["storage", "photography", "phone call", "riding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125611, "question_id": "KHhVmuDGyxuQLehzU87Rvf", "question": "What might the horns here be used for?", "choices": ["midnight music", "personal calls", "decoration", "tornado warning"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000125611.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 531352, "question_id": "KJEi4Z4JVELRVGuBo5uqTH", "question": "What is being used to portray eyes in this food display?", "choices": ["sprinkles", "nuts", "frosting", "doughnut holes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000531352.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114654, "question_id": "KLFN8UMf2VC9q92Rg5xYTr", "question": "What camera effect creates this result?", "choices": ["film", "power", "video", "flash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114654.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560932, "question_id": "KMFDztbFxmWJvHaqt3Zw9J", "question": "Where is the animal on the side of the bus usually found?", "choices": ["sky", "igloo", "cave", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419071, "question_id": "KNyJhG4b4bs5mAJhFoCKxu", "question": "In who's briefcase does this cat curl up?", "choices": ["hers", "mailman", "owner", "dog walker"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000419071.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2922, "question_id": "KP95JBYXQLfcvPrkyibvfn", "question": "What are the posts of the fence made of?", "choices": ["steel", "paper", "plastic", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373394, "question_id": "KPRh8nGKU8uV6xHNi256KC", "question": "What is the surface of this road?", "choices": ["stone", "dirt", "asphalt", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369070, "question_id": "KQdYFpDHVkdFhowrNFhhDF", "question": "What is the dog expecting to come his way?", "choices": ["treat", "man", "cat", "frisbee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4440, "question_id": "KULPUWpxspBQowNTu9gKTy", "question": "What is required for this activity?", "choices": ["rain", "snow", "ice", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004440.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182823, "question_id": "KULUACwvS7bdyDwahne9rQ", "question": "Why is he skiing uphill?", "choices": ["lost", "confused", "more fun", "exercise"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000182823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391017, "question_id": "KUR3JDqXJyzYK8PUZwqHso", "question": "How has this food been prepared for serving?", "choices": ["scooped", "poured", "sliced", "diced"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391017.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512221, "question_id": "KW4tgB4UJvGL4WLZhGsn63", "question": "What is this animal called if you add wings to it?", "choices": ["pegasus", "beagle", "basilisk", "chimera"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519615, "question_id": "KXkX7jEJcQuJeFXoay9uf4", "question": "What is most likely causing the picture to be out of focus?", "choices": ["faulty equipment", "wind", "old camera", "rain"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000519615.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553084, "question_id": "KYQGJcEkRruHJDGETf7Xdh", "question": "What is causing the man to become airborne in the water?", "choices": ["wave", "ripple", "speed", "wake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000553084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391129, "question_id": "KYcT5E4KjoJ8gGvzYU5Mun", "question": "What are the stripes on the shirt called?", "choices": ["pinstripes", "hearts", "lines", "polka dots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391129.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232803, "question_id": "Kc65eAVSXy7RXYM8zbpD94", "question": "Which produce item has the most potassium?", "choices": ["orange", "kiwi", "strawberry", "banana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84027, "question_id": "KdEQmedscGAZhcNcKcr3QC", "question": "What is usually the same color as the flower?", "choices": ["lime", "blueberry", "lemon", "strawberry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273221, "question_id": "KdqeMFmPbeARfsLBpfCRBj", "question": "The sponsor of this tournament specializes in what field?", "choices": ["electronics", "banking", "medicine", "clothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569394, "question_id": "KeAAu3By98CpuUb9obr46L", "question": "What body of water is visible here?", "choices": ["pond", "great lake", "ocean", "river"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199693, "question_id": "KfptCuavvCTqFBfTu3VeYQ", "question": "What kind of structure is this?", "choices": ["skyscraper", "house", "strip mall", "tower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199693.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224538, "question_id": "KhvvrDdZR85cuZMytmb4qM", "question": "What facilities hallway is visible here?", "choices": ["hospital", "office building", "prison", "motel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418669, "question_id": "KojbQjhrGGikSYpf4HGYnb", "question": "What is the yellow vegetable called?", "choices": ["lemon", "corn", "pepper", "yellow squash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8013, "question_id": "Koo569TQfzKx5rWNtT47yk", "question": "What type of vehicle is on the runway?", "choices": ["airplane", "helicopter", "jet ski", "yacht"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 502521, "question_id": "KotC4VTn8oRxrrUe9Xgmdi", "question": "What two letters are covered up by the stickers?", "choices": ["op", "cp", "cr", "or"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000502521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488610, "question_id": "Kp7vcoRb6NgMMtxbNfjGCB", "question": "The bag is located where?", "choices": ["chair", "right hand", "left hand", "table"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000488610.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221906, "question_id": "KpohZMuJcFzxMnS2cud4uc", "question": "What liquid is the dog smelling at the bottom of the pole on the sidewalk?", "choices": ["saliva", "soda", "water", "urine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158904, "question_id": "KrvnHBnABkEnq3z2FfRsom", "question": "What is the temperature tolerance limit of this time of flower in Fahrenheit?", "choices": ["62", "29", "50", "36"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468427, "question_id": "KtPX9ndWLs7ECh45xMt7q5", "question": "What is the nickname for the ball?", "choices": ["pigskin", "hogskin", "whaleskin", "bearskin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468427.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581429, "question_id": "KwdoFhYigk2ttTTkDy88Wp", "question": "The large animal near the small one here is related how?", "choices": ["enemy", "father", "mother", "sibling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186110, "question_id": "KyAkQm3ChEpxQvzjS6FSZq", "question": "What is the animal most likely looking for?", "choices": ["honey", "hay", "lions", "tigers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186110.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494441, "question_id": "KyMcRi9KrMGeVHiBdrF8MN", "question": "What can be said about the interaction of these animals?", "choices": ["dominant", "symbiotic", "friendly", "hostile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494441.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308068, "question_id": "KydC2ZmNVQZwbc8o5cPzsC", "question": "What material are the lamp posts on the bridge constructed from?", "choices": ["copper", "steel", "aluminum", "brass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308068.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223249, "question_id": "L3mnSsgcuZKVMaEiyCCyMF", "question": "To what degree does the child like her gift?", "choices": ["medium", "high", "very low", "low"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180864, "question_id": "L6AkFS6yMtUHTjkuFrt5TE", "question": "If an insect is irritating this animals head what body part might they first move to dislodge it?", "choices": ["ears", "hoof", "tail", "belly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317708, "question_id": "L7UyattCiat546NmptgihT", "question": "What are the object between the urinals for?", "choices": ["count users", "privacy", "splash guards", "design"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 13656, "question_id": "LArPErWeX5iodpQYXsresm", "question": "What does the white basket on the small tree most likely have inside of it?", "choices": ["giraffe food", "bird feed", "hunter bait", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000013656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 526303, "question_id": "LDatibRNGb6EDcRu9WX6TV", "question": "What is the bear on the left holding?", "choices": ["skull", "candy cane", "heart", "ham"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000526303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162120, "question_id": "LEWbicSBBG6kNqJAptq4Vr", "question": "What does this vehicle need to run on?", "choices": ["rails", "cables", "water", "roads"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525291, "question_id": "LJCeXWREnsZgEGm2Q83tjw", "question": "What thing might hold the board to this man's body?", "choices": ["rope", "drone", "air", "weld"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525291.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230816, "question_id": "LJsgspJqewKYq7aU2wypeK", "question": "What type of buildings are shown?", "choices": ["shed", "home", "barn", "skyscraper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230816.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47677, "question_id": "LKZhnDMdKYF7f3xA5uJ2ZZ", "question": "What is the man sitting down doing?", "choices": ["signalling help", "rowing", "suntanning", "relaxing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000047677.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182116, "question_id": "LKq7G6AiFwEHwddamC5LCX", "question": "What is being used to hold back her hair?", "choices": ["tape", "paper", "headband", "paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000182116.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347914, "question_id": "LLZEJThTwHDRJPheXUtpnC", "question": "What's wrong with this picture?", "choices": ["stolen toothbrush", "abandoned child", "looking away", "no teeth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514610, "question_id": "LLsfH9Jv865HzAok8CFskq", "question": "What might the dog do if you grab their dish right now?", "choices": ["beg", "lick you", "bite you", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514610.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334047, "question_id": "LNNAgnEL254UNWj6ohi69f", "question": "What type of energy is needed to move this surfboard from this person?", "choices": ["kinetic", "potential", "electrical", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574694, "question_id": "LNi5zEngEpeL2AebkUs4RP", "question": "What type of area is this giraffe located in?", "choices": ["tropical", "shrubland", "mountains", "arctic"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000574694.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344571, "question_id": "LNmhzZJg4MVtGXYvWBh5fP", "question": "What branch of the military is the person holding the phone in?", "choices": ["navy", "marines", "coast guard", "army"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543746, "question_id": "LRD8G9dvfRfvBZNACtSeds", "question": "What is the general landscape of the background of this image?", "choices": ["savannah", "mountains", "tundra", "rainforest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543746.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540188, "question_id": "LSBBetjhmPN8ycjdQ9GnGC", "question": "What is the largest appliance used for?", "choices": ["washing", "cooking", "cooling", "calling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540188.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403945, "question_id": "LTgNaW5wqXEFffkmBvLiR7", "question": "What kind of tree is the most yellow item shown here grown on?", "choices": ["lemon", "cat", "rhubarb", "tulip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403945.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518087, "question_id": "LUBHjzRSjNvGbjabgRBNh9", "question": "What dessert can be seen in the container?", "choices": ["cupcakes", "doughnuts", "brownies", "cookies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291409, "question_id": "LVkrzpdU39GFC9Zntp8H4s", "question": "What is the man using the tool to turn on the hydrant?", "choices": ["handle bar", "chains", "hose", "stem nut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291409.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436210, "question_id": "LWwQy8Po7mG7qkr6NpX8uz", "question": "Which species is mentioned in this plane's name?", "choices": ["canine", "equus", "rodentia", "feline"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436210.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163380, "question_id": "LXrUirZ84rHuQShXLnoSuK", "question": "What is the man trying to do?", "choices": ["clean", "run", "cook", "talk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163380.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 405783, "question_id": "LZwFzvZPNz9zmFZpcZiWnx", "question": "What venue is shown in the picture?", "choices": ["bedroom", "hotel room", "dinning room", "living room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000405783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50830, "question_id": "LbHCSsZGbJYLdTGfSbdpUx", "question": "In which state is the boat seen here?", "choices": ["salvaged", "sunk", "water logged", "dry dock"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000050830.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93627, "question_id": "LcSS3m2bwWFk6krEeQWiSL", "question": "What would be the most dangerous thing to drag a finger along?", "choices": ["food", "liquid", "handle", "blade"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093627.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 217508, "question_id": "LcV5ooSKbJUMkNnPcQeEHo", "question": "Who is this cake designed for?", "choices": ["boss", "child", "father", "bride"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000217508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115411, "question_id": "LcVSKmZRkKGwjxFsAmuRTv", "question": "Where does this animal like to play?", "choices": ["ocean", "boxes", "sky", "volcano"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115411.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506086, "question_id": "LfowUgRztRm7uk2SrCRf56", "question": "What is the brown area ahead of the animals on land made from?", "choices": ["grass", "straw", "sand", "stone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138474, "question_id": "LhXC9GVFdhwetVfsmUvfbS", "question": "What is not on the pizza?", "choices": ["cheese", "doe", "chocolate", "meat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 228430, "question_id": "LhgTn63Bs3KZo8yvhPgm47", "question": "Where are these animals located?", "choices": ["barn", "pasture", "desert", "forest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000228430.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576906, "question_id": "LjDFhSdBsDQar36r2CcYwe", "question": "What type of rail car is behind the engine?", "choices": ["caboose", "hopper", "tank", "flat car"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000576906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265653, "question_id": "LjSD8k2SnWzE9W9mrmbmcE", "question": "What fruit is shown here?", "choices": ["pear", "apple", "orange", "grapefruit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142338, "question_id": "Lkbd4LXsdqPothDEqkRXCh", "question": "What does the tags on the ears provide?", "choices": ["identification", "bug repellent", "ear protection", "fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142338.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320536, "question_id": "LntYvUaDRTovoZEEdWNQwg", "question": "The base color of the animal without the stripes is what?", "choices": ["pink", "white", "black", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320536.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5382, "question_id": "LpbgVozrcCKtRowMin2h8m", "question": "Skateboard is made up of which wood?", "choices": ["ash", "redwood", "pine", "maple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005382.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 531064, "question_id": "LtV4t9WZXivK4izKTW5bSu", "question": "Where is this toilet located?", "choices": ["store", "outhouse", "mall", "home"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000531064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202771, "question_id": "LutYKKkZmC7cdpKBTwRBLm", "question": "What are the people riding in the water?", "choices": ["canoe", "boat", "surfboard", "jet ski"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119660, "question_id": "LxtfRcjz3hWMq9urLsmNPR", "question": "What pattern is on the hat?", "choices": ["zigzags", "stripes", "dots", "checkers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 274207, "question_id": "M2hm7Z5oC7riGsnb5F2aQv", "question": "What is this appliance used for?", "choices": ["calling", "cooling", "cooking", "watching"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000274207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 140715, "question_id": "M2nuCqouqRsmaSgGJp8SJW", "question": "What is unusual about the placement of these street signs?", "choices": ["amount", "color", "shape", "size"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000140715.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576271, "question_id": "M3n6xUsztXRLXzAufheA6v", "question": "What is the most common zebra breed?", "choices": ["maneless", "plains", "crawshay", "chapman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576271.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445807, "question_id": "M4fyLvbRComrjPbmvsGaNe", "question": "What us the last letter in the name?", "choices": ["t", "l", "g", "s"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445807.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82651, "question_id": "M5762NZdeVtn6HAZxvMSos", "question": "Why is he wearing head to toe wet suit?", "choices": ["cleaner board", "floats better", "stay dry", "cold water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082651.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 143892, "question_id": "M6LLWLWwyt39Qyx5rztUia", "question": "The person here likes what?", "choices": ["dictionaries", "novels", "moon travel", "non fiction"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000143892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319138, "question_id": "M6vSw7MBKh3UCR5LxhuLAk", "question": "What is most likely the weather outside in this image?", "choices": ["cold", "snowing", "rainy", "humid"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000319138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264641, "question_id": "M7G4oMF4ZaQW7TsVCqih6v", "question": "What item is the cat sleeping on?", "choices": ["human", "pillow", "hammock", "sweater"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24083, "question_id": "M8vaCdAXkKHTEosySDvLxk", "question": "What winter activity is the person participating in?", "choices": ["ice skating", "skiing", "ice hockey", "snowboarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024083.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420430, "question_id": "M9x5RmrzWMcrQY5nqtzBZw", "question": "What type of shower enclosure is in the picture?", "choices": ["rectangular", "half-circle", "semi-oval", "square"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420430.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487490, "question_id": "MCiRMdjyUxekPJ7EqjmNmm", "question": "How was this wood transported to this site?", "choices": ["by sea", "on horseback", "uber", "wood truck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487490.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473033, "question_id": "MCsUiMGyBP4xnUr5CUEnTD", "question": "What zodiac sign is in the month that is near the small hand pointing to 9?", "choices": ["aries", "gemini", "scorpio", "capricorn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473033.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428070, "question_id": "MCsYRUvwvAdhsG2yr5betE", "question": "Which animal is more likely to bite the other?", "choices": ["none", "sheep", "zebra", "gnu"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248947, "question_id": "MDNuMySjKeBhDA7PrvMxEA", "question": "What move is this player going to make?", "choices": ["forehand", "serve", "lob", "backhand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390521, "question_id": "MEjXecbUc9uRf4Au8qwAbp", "question": "The white section of the cake means what has been added?", "choices": ["glaze", "sugar", "salt", "cheese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478414, "question_id": "MGmZoN4RvT6xbemNWqk96M", "question": "What might the metallic item do to the things in the container?", "choices": ["pulverize", "harden", "stir", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170011, "question_id": "MJ7Y8iYoZJuRspapYawfEJ", "question": "What is the closes bear doing?", "choices": ["feeding", "building nest", "posing", "hiding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170011.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253067, "question_id": "MJR7K5iuC5RzYtwp5t23ep", "question": "What happened to the cat?", "choices": ["was sleeping", "was startled", "was leaving", "was caught"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000253067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581764, "question_id": "MJe6vF3KE7Lan2W9FaaHGA", "question": "What happened to this sink?", "choices": ["implosion", "crack", "fire", "flood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52696, "question_id": "MNkNZiHMxTrU3hzypkMFn5", "question": "Why is one urinal lower than the others?", "choices": ["child urinal", "decoration", "construction error", "broken"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052696.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575085, "question_id": "MPN6FFExqgqzaKjjC4oRMT", "question": "Why is the water so clear?", "choices": ["ammonia", "oxygen", "chlorine", "salt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 361473, "question_id": "MPwexPGw4NVXgihuso2iaB", "question": "What is the job of this dog?", "choices": ["carry", "herd", "fight", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000361473.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485448, "question_id": "MQnoGSKa9txNCMgMFnX5Up", "question": "Where can you find this type of animals?", "choices": ["national park", "people's park", "city", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485448.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389893, "question_id": "MUGLiZh5fegjh3zPiAdMrb", "question": "What is the snowboarder doing?", "choices": ["trick", "gliding", "resting", "falling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389893.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310624, "question_id": "MUiuKRpdbh7XT76FV9tHTz", "question": "Why would someone sit at this table?", "choices": ["to sew", "to work", "to eat", "to paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310624.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5448, "question_id": "MVCD2Ca9qNccMEGqUhos5t", "question": "How has this food been prepared to be served?", "choices": ["scooped", "diced", "sliced", "cubed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005448.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38260, "question_id": "MW8AD6AXyNdhwsYQmvU7Vb", "question": "The excerpt shown is from what kind of book?", "choices": ["bible", "encyclopedia", "thesaurus", "dictionary"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038260.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271242, "question_id": "MWcxXhZnUnnGa426KwoYyz", "question": "What are the round objects used for?", "choices": ["cutting", "gluing", "coloring", "stamping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438949, "question_id": "MY3eJnpnJ8U6dQrqsc4dBb", "question": "What is between the cow on the ground and the fence?", "choices": ["bowling ball", "rake", "cat", "tether"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438949.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145156, "question_id": "MYnGS9qfwmEto3m5A37q4f", "question": "What is the function of the white appliance?", "choices": ["clean", "call", "cool", "cook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450978, "question_id": "MZA6qmda5uqtkpakb8pisF", "question": "What type of bottles are shown?", "choices": ["glass", "metal", "plastic", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342053, "question_id": "MZL2x7Mvup8sggTbB97nfb", "question": "Why is he facing away from the camera?", "choices": ["rodding house", "hiding", "examining books", "ashamed"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000342053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264576, "question_id": "MbPgqs3cdiCpXj8s29o7d3", "question": "What is behind the surfer?", "choices": ["car", "alien", "bus", "wave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264576.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474573, "question_id": "MbYCeLHN2GBrq6ovmQF5Fv", "question": "What is in the dogs hair?", "choices": ["gum", "ponytail", "braids", "hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 541220, "question_id": "MbapeCBwr35xdYqMaPbckY", "question": "What does it look like is inside of the crust?", "choices": ["salami", "cucumber", "linguini", "salmon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000541220.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439137, "question_id": "MbrZQMHm9r9bTsjGCNbkaP", "question": "What kind of flooring is in the bathroom?", "choices": ["hard wood", "linoleum", "carpet", "tile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439137.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56008, "question_id": "MdWbo986A7A6UPRrRhjxwi", "question": "Why are the pies being placed on the racks?", "choices": ["to cool", "to heat", "to sell", "to cook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253340, "question_id": "MeX5UpRNxgRqjM8hAuPD2z", "question": "What item in this picture is a noun as well as a verb?", "choices": ["pink item", "green item", "red item", "yellow item"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454518, "question_id": "MefjAqAzXpGKdvvSEbcmPA", "question": "Where does he seem to be stacking pretzels?", "choices": ["cafeteria", "kitchen", "restaurant", "fair"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000454518.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412514, "question_id": "MgsjgTmPGvBhc2KJkomAYA", "question": "The woman is seemingly cuddling with what electronic item?", "choices": ["tv", "laptop", "projector", "tablet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272567, "question_id": "MkfPTDCzHa92cDoabsQ48G", "question": "What activity is the person wearing the least clothing here enjoying?", "choices": ["frisbee throwing", "picnicing", "tennis", "sunbathing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272567.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276598, "question_id": "MkyNSd3YBLbWfwimQWDizv", "question": "What is the model of this bus?", "choices": ["top open", "double decker", "single", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276598.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354240, "question_id": "MkyniCFJ7HSyv8Kwn8z4zc", "question": "What word does the figure above when wet stand for?", "choices": ["dangerous", "fun", "slippery", "clean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485641, "question_id": "MmPHJDKGpiZcYs9MoZvw36", "question": "What is the green vegetable in this pie?", "choices": ["spinach", "carrot", "capsicum", "lettuce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85269, "question_id": "MnsvZV9qUKsZn6coH5ehEt", "question": "How much water do the flowers require per day?", "choices": ["none", "1 ounce", "3 ounces", "8 ounces"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000085269.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77358, "question_id": "MnuFwZnZBFuFTh7ycwNNW9", "question": "How many zoological classification does elephant consists?", "choices": ["five", "seven", "three", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 253610, "question_id": "MoifBAGGhKUCeex4NUfpXH", "question": "What is on the bottom right?", "choices": ["umbrella", "sword", "drawing", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000253610.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391878, "question_id": "Mpi9iSfGpDcEw5fYBj3hYk", "question": "What is the time displayed on the public clock above?", "choices": ["1028 pm", "557 am", "557 pm", "1028 am"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391878.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30075, "question_id": "MqWjp9eoeZCpKDm6ihQaJi", "question": "What is unique about the animals above?", "choices": ["long neck", "herbivores", "are wild", "mammals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030075.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123557, "question_id": "MqbB866otd5Xkz8uYL2Udk", "question": "What has caused her hair to be in her face?", "choices": ["wind", "hairspray", "gel", "pins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167716, "question_id": "MrTYjyGLK4eLmMfrdhpBhs", "question": "The large animal is related to the small one in what way?", "choices": ["lamb", "enemies", "father", "mother"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167716.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547586, "question_id": "My7CnFDQP3ejjAv68bDeWq", "question": "The color of the flowers is closest to the color of what food?", "choices": ["banana", "lettuce", "blueberry", "tomato"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547586.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349844, "question_id": "Mzi2PYvLPPg2xHcVLRL3S7", "question": "What time of transportation is absent in the signs or in the background?", "choices": ["driving", "walking", "boating", "cycling"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000349844.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323867, "question_id": "N2eaqvQ7Q6gY4iENAsZRC2", "question": "For what purpose does the animal here raise it's tail?", "choices": ["curiousity", "nervousness", "defecation", "ire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000323867.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535319, "question_id": "N3a6PcWEgPu2ZXPu5WudkF", "question": "What is the man most likely doing in the room?", "choices": ["brushing teeth", "taking shower", "washing hands", "combing hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81671, "question_id": "N3pFpSoDKk7RCRk5oTgbiR", "question": "When device is likely nearby but invisible?", "choices": ["television", "computer", "speaker", "cellphone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081671.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458937, "question_id": "N4xdbKGB5c6cWdPusRYvG4", "question": "These types of laptops were geared toward what type of consumer?", "choices": ["artists", "dancers", "business people", "musicians"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495865, "question_id": "N5sVmHqXrsooukeSbEQxj9", "question": "What animal eats the green things regularly?", "choices": ["cows", "dogs", "cats", "snakes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495865.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340768, "question_id": "N6GkmN4HqKksRoeXZNgoZu", "question": "What class accommodations is this train car?", "choices": ["executive", "poor", "coach", "business"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33226, "question_id": "N7DgZivpmmMhJBe7iwbPgE", "question": "What type of outfit is the person wearing?", "choices": ["tuxedo", "wet suit", "bikini", "baseball uniform"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033226.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478208, "question_id": "N8xtpYmkKHPPo5ux8nykR9", "question": "What is used for keeping warm in this picture?", "choices": ["air", "blanket", "water", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447498, "question_id": "N9GA8sR83ezegRxcALAGc8", "question": "What organization supports the message on the sticker that's on the stop sign?", "choices": ["wwp", "wwf", "red cross", "peta"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 140075, "question_id": "N9Rm8TZpvb73g7SWnVV4Nd", "question": "What is the zebra doing?", "choices": ["seeking food", "mating", "fighting", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000140075.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190070, "question_id": "N9WtKhrbeQ5yrDVJrpKj3r", "question": "Why is the person wearing things on their knees?", "choices": ["observation", "protection", "control", "fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89184, "question_id": "N9iNzuBx9LWmA5eGuJtNPm", "question": "What is between the smiley face and the green base?", "choices": ["bandage", "spring", "cat", "string"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000089184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65825, "question_id": "NAAEAn8DkAijEDEb7GbbsM", "question": "What weather is the woman prepared for?", "choices": ["snow", "heat", "rain", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065825.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9793, "question_id": "NAHDpV9QU3YwbM6Jv98Dmf", "question": "What is the net protecting the bed from?", "choices": ["smoke", "bugs", "light", "thieves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009793.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118684, "question_id": "NBHJEBmnG5fLKbeH87JZZ2", "question": "How were these two hot dogs cooked?", "choices": ["baked", "grilled", "pan seared", "microwaved"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440961, "question_id": "NBjvN2WxTRgkxRiyi2XFSv", "question": "One can turn left to go to what type of institution?", "choices": ["police station", "city hall", "prison", "college"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440961.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34911, "question_id": "NF8oQ6U8JU3TRkf7o59ypb", "question": "How did this skier get to this location?", "choices": ["snowmobile", "uber", "skied", "chair lift"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034911.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258604, "question_id": "NGjxP29n2wqy4KWgMwCKKC", "question": "What kind of court is the guy playing on?", "choices": ["gravel", "turf", "clay", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258604.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170823, "question_id": "NGo5bD5Y9XfJzJM4vNwPXL", "question": "What is the bear supposed to be doing?", "choices": ["resting", "writing email", "writing neovel", "playing solitaire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170823.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 236646, "question_id": "NHRGkEXcheeUwYGSwAN7mL", "question": "What will allow it to grip the frisbee?", "choices": ["teeth", "paws", "tail", "ears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000236646.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77457, "question_id": "NLEezee64BPxUR4EBNvGic", "question": "What probably knocked down that tree?", "choices": ["wind", "chainsaw", "snow", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341282, "question_id": "NLknBuc3xvxGHrRY8XpiQ8", "question": "What can be accessed by the fixture by the road?", "choices": ["alarm", "water", "phone", "assistance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533205, "question_id": "NN895oPYtnycYtuTvqqZjx", "question": "What is he ready to do?", "choices": ["dribble", "swing", "sprint", "dunk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533205.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491975, "question_id": "NNcZrKThsboFeWQEUwDZxS", "question": "What brand made the man's red shirt?", "choices": ["adidas", "nike", "reebok", "puma"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125088, "question_id": "NNuVAmydTFmXEh8jBzXxDn", "question": "What type of transportation is shown?", "choices": ["air", "road", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125088.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449074, "question_id": "NPGiaYY5dq6h6xswiqJhdD", "question": "What kind of material is the building made out of?", "choices": ["glass", "iron", "wood", "bricks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449074.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136090, "question_id": "NPszn8REzv58nVG4TwsD7L", "question": "What is the cat doing on the beige colored furniture?", "choices": ["playing", "bathing", "eating", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136090.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263097, "question_id": "NQHvVFbRmQi87jijDaBRe7", "question": "Where are these items usually served?", "choices": ["french restaurant", "barbecue", "mexican restaurant", "italian restaurant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189202, "question_id": "NS8wCytpNQxHZmb2o9CKvE", "question": "What was used to create the art?", "choices": ["watercolor paint", "oil paint", "tempura paint", "spray paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189202.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252230, "question_id": "NScRovgawxA7zaoPd7rAM7", "question": "What actress has the same color hair as the woman?", "choices": ["rebecca romijn", "sissy spacek", "idris elba", "jessica simpson"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000252230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281186, "question_id": "NUZdBqEkxqixnbHC8sWpqE", "question": "The yellow item is often added to what?", "choices": ["pizza", "fish", "cereal", "steak"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281186.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 312057, "question_id": "NVFx74sKTT9meYkGb5phgC", "question": "What would a sign like this traditionally say?", "choices": ["no loitering", "no parking", "grand opening", "no soliciting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000312057.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364170, "question_id": "NVGQshN8S3NQAUzjqueCoQ", "question": "What is the character in the green vest holding?", "choices": ["tray", "broom", "mop", "pumpkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000364170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56697, "question_id": "NVmE5tNMT9hrzukvpYeDx2", "question": "What type of floatation device shown in image?", "choices": ["two", "four", "three", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21616, "question_id": "NXSdHqCraNyqsXvZ7758md", "question": "What are the group of tattoos on the woman's arm called?", "choices": ["sleeve tattoos", "blackwork", "classic americana", "realism"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021616.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147227, "question_id": "NYZ3S3yqGtVbTjLkEJGa4q", "question": "What kind of signage is behind the giraffe?", "choices": ["advertisement", "story", "warning", "psa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147227.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367781, "question_id": "NYgJ42KxbEsKMbAhAiqx6S", "question": "What black item's silhouette is visible here?", "choices": ["paper plane", "bird", "ufo", "plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527900, "question_id": "NZE4B8wQZ24qWtZt6P5wS4", "question": "What word could describe the toilet paper itself?", "choices": ["monocolor", "surplus", "green", "deficit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546051, "question_id": "NaiMHpX5B3Dzo6EdnRQZeu", "question": "What would first cause the giraffe to lower it's head and stare forward?", "choices": ["curiosity", "fear", "hunger", "joy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146287, "question_id": "Naq34in29vMNXPJfNDu4Dg", "question": "What feature of this animal is the most precious?", "choices": ["skin", "tusk", "nose", "eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146287.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362874, "question_id": "Nbgw6hFhZBznYXpi2NC4tj", "question": "What material are the boxes on the left made of?", "choices": ["wood", "cardboard", "plastic", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362874.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543415, "question_id": "Nc3yJwd6RwYjvog3qZB5yA", "question": "This item served it's owner once by allowing them to do what?", "choices": ["make ice", "microwave popcorn", "watch tv", "pick corn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466127, "question_id": "NdiRXjVNcEgdqyp9SP9PiJ", "question": "What kind of a shirt is the man in green wearing?", "choices": ["henley", "polo", "tee", "flannel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172400, "question_id": "Nf3PbG7DW6YyNMbrSdHfrQ", "question": "What is the item the man is holding currently protecting against?", "choices": ["sun", "bullets", "karate kicks", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172400.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331948, "question_id": "NfqLuNTkJth5S6KwaZ69Tu", "question": "Which object on the table would provide the most danger if touched directly?", "choices": ["candle", "cutting board", "remote", "phone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 448071, "question_id": "NgcJzuiPuriWypW6XMaExd", "question": "This car will transport one to where?", "choices": ["prom", "beach", "prison", "market"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000448071.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 662, "question_id": "NieAHbzWjv9uUNyPPNGhpZ", "question": "Where is this airline headquartered?", "choices": ["new york", "las vegas", "dallas", "baton rouge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000662.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528304, "question_id": "NioZQioSGhEniEHRhVMAbu", "question": "What travel site is a sponsor of the baseball field?", "choices": ["yelp", "priceline", "expedia", "julius"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542026, "question_id": "NkF7x7gV5iSFkJktNVAGCU", "question": "What are these animals closely related to?", "choices": ["cows", "wolves", "tigers", "horses"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441760, "question_id": "NkLMnrPwWNXuHFSbsJAHps", "question": "What type of terrain is shown behind the animals?", "choices": ["forest", "mountain", "beach", "valley"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 220294, "question_id": "Nn9X6824VucUwMGrMP2ibS", "question": "What is on the grass?", "choices": ["weapons", "trains", "animals", "toy soldiers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000220294.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161822, "question_id": "NoAGBBbUV4kCBERJZtnPPw", "question": "What type of transportation is shown?", "choices": ["air", "rail", "water", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161822.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 310335, "question_id": "NrE4htA2aTuEGuJ8jEJhD4", "question": "There would be danger if what came down the other track?", "choices": ["leaf", "rain", "train", "animal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000310335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393161, "question_id": "NrXX5hvJq8eYojqjZ9Hpzj", "question": "What major dog group does this dog belong to?", "choices": ["terrier", "toy", "hound", "work"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248877, "question_id": "Nrckmbwpii6jQPcmD9Aw4s", "question": "This child is probably in what location?", "choices": ["daycare", "school", "kindergarten", "home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512253, "question_id": "NrtwmAMTLpZ8cmNz6C9MaR", "question": "The equipment in this person's hand is called a?", "choices": ["racket", "club", "broom", "bat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512253.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427684, "question_id": "Ns6sNWe6yzBxfJkXcGnBaK", "question": "The namesake of this university made his fortune in what?", "choices": ["railroad", "lumber", "oil", "coal"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000427684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341962, "question_id": "Nu8jSrYwRawZWh8z6WCVz4", "question": "What is this kind of bus called?", "choices": ["double decker", "coach", "mini", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341962.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84882, "question_id": "NvYrYjYsEKQhiuj6sYQouE", "question": "What character is the same color as the jacket?", "choices": ["daffy duck", "snoopy", "miss piggy", "kermit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084882.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 13971, "question_id": "Nw8mLegEwbjGasKvWgLgP7", "question": "What is the genus of this striped animal?", "choices": ["homo", "equus", "panthera", "canus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000013971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435027, "question_id": "Nx7rzxCmm2RZ3nSE2ngCRe", "question": "What activity is the person in the foreground performing?", "choices": ["surfing", "roller-skating", "diving", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435027.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399953, "question_id": "NxCD7rVfFZPeDPwmu63dao", "question": "What type of starch is shown?", "choices": ["vegetables", "cheese", "pizza crust", "sauce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399953.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184070, "question_id": "NxKEwSUxpdiFwQ3cDLcouS", "question": "Whats another name for the object on the chair?", "choices": ["pony", "stallion", "figurine", "pokemon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95577, "question_id": "NyQgcH7Fm9pupxjsuLpzUR", "question": "This boy likely idolizes what athlete?", "choices": ["kapil dev", "john part", "tony hawk", "earl anthony"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095577.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48085, "question_id": "NyiYgYT5JjakturuKqiGXM", "question": "What kind of tower is shown?", "choices": ["lattice", "water", "cell", "clock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520954, "question_id": "NzQacY5GXPdhXAFbheWPaV", "question": "What type of highway does the blue and red sign indicate it is up ahead?", "choices": ["interstate", "state", "county", "fast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520954.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441316, "question_id": "NzcfSN2djQhiSss5ejhGeB", "question": "Which food on the plate is known for being good for your eyes?", "choices": ["potatoes", "asparagus", "tortilla", "carrots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441316.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76702, "question_id": "P2425n357K3jLMy8vTMyZj", "question": "What feature may draw the cat to rest here?", "choices": ["hardness", "moisture", "loud noise", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076702.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239209, "question_id": "P2T5yMBWef4AFcyD529b5p", "question": "What activity are these animals currently doing?", "choices": ["attacking", "grazing", "stalking", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81195, "question_id": "P2zMqibRrDdDaCzP2kBinN", "question": "Which food is shaped differently than normal?", "choices": ["potato", "carrot", "peas", "broccoli"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081195.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516869, "question_id": "P4LbpT6qNFZYM9EeG7qjq8", "question": "What are the horses doing in the grassy area?", "choices": ["mating", "eating", "attacking", "running"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516869.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 530442, "question_id": "P5hvPZv2qa7KjWT2p25nVT", "question": "The side of the bus is advertising that what service is available?", "choices": ["breakfast", "massage", "telephone", "internet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000530442.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35231, "question_id": "P6aPN6wpwE2oummPCZovfN", "question": "What is this animal doing?", "choices": ["defecating", "nothing", "fleeing", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33024, "question_id": "P7PfpLrdRBj2WUtk85gLLk", "question": "What are they doing together in the water?", "choices": ["washing", "eating", "surfing", "kissing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033024.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191087, "question_id": "P7daUX45dNXd8NoZvH4w42", "question": "What is between the broccoli?", "choices": ["potato", "meat", "radish", "eggs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67343, "question_id": "P92uax7nPdK336NsaRFUQS", "question": "How is this food prepared?", "choices": ["boiled", "fried", "grilled", "frozen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000067343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579263, "question_id": "P9rLcKnRpxBQotVT7Coz9k", "question": "The item seen here was prepared where?", "choices": ["chandlery", "bakery", "butcher", "deli"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169184, "question_id": "PA237KDqxnVYmAvDhszguo", "question": "What is this container used for?", "choices": ["travel", "food", "drink", "trash"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000169184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371291, "question_id": "PBDV3UhZm3iaj4nHLijyKx", "question": "Where are the cat located?", "choices": ["outside window", "inside house", "on roof", "on floor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371291.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221046, "question_id": "PCTASvn4GUxTiy82VVVYzL", "question": "The black box-like structure beneath the traffic light likely serves what purpose?", "choices": ["pedestrian crossing", "stop sign", "railroad crossing", "warning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44155, "question_id": "PCsJiACJGJJFt9P5qq7tFe", "question": "What does this engraving seem to depict?", "choices": ["moon", "sun", "earth", "mars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93134, "question_id": "PD44AQ99GePdL8CM89idGN", "question": "What is the black cat on top of?", "choices": ["chair", "box", "grass", "shelf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093134.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91962, "question_id": "PDtGLaiFmEv2FrdFTWcSuh", "question": "How many city clocks do you see?", "choices": ["five", "three", "one", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091962.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436390, "question_id": "PE6SRn3oNKRYnC86CcvxNZ", "question": "What is okay for the baby to have in its mouth?", "choices": ["shoes", "fingers", "hot dog", "chips"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411169, "question_id": "PEdamxwdgKHGiuMpLGCW2S", "question": "How is this type of plane called?", "choices": ["spinner plane", "propeller plane", "rotator", "fan plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411169.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214895, "question_id": "PLDsTQdPu8giMwXrADzA6u", "question": "What is this type of transportation called?", "choices": ["water", "rail", "air", "land"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214895.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268118, "question_id": "PLXDdvePCjU2u9CQkqA6fE", "question": "What game would the dog be playing with the object in his mouth?", "choices": ["catch", "spar", "tug", "find"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214572, "question_id": "PLrRSy2WYM5D8V75A4pjwX", "question": "What sound does the animal make?", "choices": ["woof", "meow", "moo", "neigh"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214572.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49806, "question_id": "PNVrFKxFo4fZtS3pDHRows", "question": "What time of people are allowed to live in Amica residences?", "choices": ["children", "seniors", "young", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325825, "question_id": "PNXzRZxQWY6USHpHTbReXK", "question": "When was Samsung founded?", "choices": ["1983", "1930", "1938", "1944"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000325825.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298968, "question_id": "PPMzcjoRUKW8a3CjXso8xT", "question": "What is the horse doing?", "choices": ["resting", "racing", "feeding", "pausing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411508, "question_id": "PQ3gJTYzQpSbNJCm5DVYwt", "question": "What professional can make its fur look better?", "choices": ["veterinarian", "groomer", "breeder", "taxidermist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411508.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320638, "question_id": "PQh8fgbfYwBMSbPbRD34Sc", "question": "What is in the bottle by Fiji?", "choices": ["soda pop", "water", "beer", "coffee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99357, "question_id": "PQjNBiefzDeA7iVuHi43FX", "question": "What is the metal piece beside the faucet for?", "choices": ["shampoo", "lotion", "soap", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099357.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 42487, "question_id": "PQnqGbV4HEyYrkbB9KgfYd", "question": "What does the cat probably hope the man will do?", "choices": ["wash it", "clean room", "pet it", "sing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000042487.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318695, "question_id": "PQshzBEBC7GknoWdS3df5B", "question": "Who probably once owned the elephant?", "choices": ["child", "hamster", "adult", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318695.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 321246, "question_id": "PRfdX4ZMLfHc4CJcT2KZ4j", "question": "Why is the bird clinging to the branch sideways?", "choices": ["is stuck", "evading predators", "is lost", "is tired"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000321246.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26511, "question_id": "PSc4QLR8AwKNfpREm3fAoe", "question": "What type of transportation is shown?", "choices": ["air", "water", "land", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83760, "question_id": "PSkouNYcsDzs6NKWWUqnxF", "question": "These cooked objects are covered in what substance?", "choices": ["ketchup", "pasta", "cheese", "tofu"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450622, "question_id": "PT2Sma9Xw6sruwaGvHhTr8", "question": "How many picture frames can you see?", "choices": ["one", "three", "two", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450622.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464539, "question_id": "PUYH8aaf5ypxBbfbZ4FnZc", "question": "What body part would most likely be injured first if he missed the skateboard?", "choices": ["head", "back", "ankle", "wrist"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464539.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351455, "question_id": "PVAjEWL8J4L8yi5XG7pZBB", "question": "How many types of animals pictured above are herbivores?", "choices": ["all", "none", "one", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351455.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74144, "question_id": "PWsTJAegnCycFqBF2qLU6j", "question": "What is she trying to do?", "choices": ["flip", "descend", "roll", "ascend"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519914, "question_id": "PXDZJf7b6XCz83WBnipNBV", "question": "What country is this intersection located in?", "choices": ["france", "england", "canada", "united states"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288681, "question_id": "PXNydLBWXSrb9PtLrZJ9kP", "question": "What does this object use to wash away waste?", "choices": ["air", "water", "gravity", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127963, "question_id": "PXUptTwJTxgpGvYZXQdXnz", "question": "How would he close his sweater?", "choices": ["velcro", "buttons", "zipper", "laces"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438951, "question_id": "PXjQZMqSZe9xZc99jyWUzp", "question": "What is the name of the process that produces this animal?", "choices": ["aneurysms", "mitosis", "metabolism", "metamorphosis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438951.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 487969, "question_id": "PY38SmaSYeibR8Piz35yxf", "question": "What move is the skateboarder performing?", "choices": ["grind", "720", "kickflip", "superman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000487969.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238785, "question_id": "PYqx7vF3ykZcNrkn2grHBf", "question": "Where might the elephant have been recently?", "choices": ["ocean", "watering hole", "snow", "lake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000238785.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378180, "question_id": "PZM62PdMk7xJuHuhkqRGQ9", "question": "The animal has how many protrusions below its eyes?", "choices": ["seven", "two", "eight", "nine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378180.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485208, "question_id": "PZcajMCDdZV67YaGUaVHuF", "question": "Why do these animals have markings?", "choices": ["branding", "birthmark", "identification", "decoration"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000485208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545077, "question_id": "PappSenpbRee8Nvo9sWNvP", "question": "What do these animals use to move?", "choices": ["eight legs", "opposable thumbs", "wings", "shell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545077.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227764, "question_id": "PcFUjknAKFHMEQMpzigxmY", "question": "Why has this person covered their head?", "choices": ["warmth", "uniform", "protection", "religion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183501, "question_id": "PcHUnoEif8HuTpRwk2pYdu", "question": "London's Houses of Parliament also called Palace of Westminster is it true?", "choices": ["true", "maybe", "none", "false"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183501.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161299, "question_id": "Pducsf39DTWMQCHG6jNrj4", "question": "What is the weather in the photo?", "choices": ["cloudy", "sunny", "rain", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161299.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460217, "question_id": "PedDTKkxx4QK4RNkhWt7eE", "question": "What food item matches the most plentiful color on the sign?", "choices": ["blueberry", "lemon", "lime", "cherry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460217.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223802, "question_id": "PejmAuMNZNzpD9tsZG3LSK", "question": "What in the picture is capable of walking?", "choices": ["white", "small black", "large black", "brown"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342267, "question_id": "PfBkmx7GKtxxaGGB9oihXx", "question": "How is this vehicle on the tracks powered?", "choices": ["fire", "overhead lines", "underground mines", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000342267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458165, "question_id": "PfpNrACJx4zMef9z5UgYfv", "question": "The building behind the clock is most likely zoned for what kind of use?", "choices": ["commercial", "religious", "residential", "industrial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458165.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51670, "question_id": "PguK7Ubh9T7oq8N7TWin8a", "question": "In which type building would you find a bathroom like this?", "choices": ["motel", "mall", "expensive condo", "gas station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377198, "question_id": "PhktTcjz76YVUT2XLMfE8e", "question": "Why are this person's hands on the ground?", "choices": ["trick", "exercising", "resting", "fell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161376, "question_id": "PhmNaQeMaLficHrbfH4c57", "question": "This person probably lacks what fear?", "choices": ["heights", "spiders", "enclosed spaces", "public speaking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161376.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85465, "question_id": "PhzCykMXuGFBQzRzjNCU2q", "question": "What is in the measuring cup to the right?", "choices": ["raisins", "acorns", "sugar", "salt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085465.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 389958, "question_id": "Pjssi2W79EMSoYqxNZeQSC", "question": "What does the woman have on?", "choices": ["glove", "boxing gloves", "bracelet", "hat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000389958.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500397, "question_id": "PkHJoCVt3ptTpJvZoLep4a", "question": "What is the animal using to grab on to the seat?", "choices": ["quills", "trunk", "mouth", "claws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21729, "question_id": "PmHsgSkSdDaFhmXrXtcsaB", "question": "What is the person wearing on his foot?", "choices": ["flip flops", "sneakers", "shoes", "boots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69636, "question_id": "PnmccdkfoBxYFWSzLQCmtZ", "question": "What is the man performing trick wise?", "choices": ["900", "grind", "kickflip", "ollie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000069636.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 571892, "question_id": "Pnwx3Vdjvwc7PM8DyjaBN7", "question": "What is he swinging?", "choices": ["racquet", "paddle", "bat", "club"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000571892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577784, "question_id": "PpByjeVTZ9PJ9zmNp5WSJ7", "question": "What kind of surface is the plane on?", "choices": ["highway", "tarmac", "parking lot", "street"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424704, "question_id": "Pq3rbdGNe43QtYNBSNZXAZ", "question": "Why is this man covering his head?", "choices": ["warmth", "uniform", "germs", "sun"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000424704.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282435, "question_id": "Pqkmsp9bQG5AediWqPJ3wr", "question": "Where is the yellow train about to stop?", "choices": ["station", "themepark", "hotel", "farmhouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4158, "question_id": "PrQoboqJ2vPhHZESDqxmKd", "question": "What will the birds seek in the water here?", "choices": ["reflective surfaces", "nothing", "food", "nesting material"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004158.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4398, "question_id": "PtvACKj2Tk38TZyoRV3Fwx", "question": "What color will the wheels most likely become as the rider uses the skateboard more without cleaning it?", "choices": ["blue", "clear", "green", "darker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135085, "question_id": "PuFfCa7RXWgzSS5qQvSsEw", "question": "What is the person holding up?", "choices": ["their arm", "sign", "baby", "box"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 454480, "question_id": "Px87u5gf5RqwZs6nUdWQM3", "question": "Which breed of cat has absence of fur?", "choices": ["munchkin", "persian", "ragdoll", "sphynx"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000454480.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433687, "question_id": "PyBPutkUbaTufKDMkavvNp", "question": "The people with the cycles here likely hired the boat owners here to do what?", "choices": ["hide them", "repair bikes", "ferry across", "serve lunch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352204, "question_id": "PyzP4oRWvzUYn78dUdFnnx", "question": "How is the robot able to move around?", "choices": ["using wheels", "using tracks", "using fans", "using legs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352204.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473120, "question_id": "PzUv4KsVhihkbwbw9JfBzj", "question": "What toppings are on the pizza?", "choices": ["eggs", "broccoli", "mushroom", "peppers"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000473120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180913, "question_id": "Q3AheyrBUMb3aAXedutd9h", "question": "What is a current danger to this animal?", "choices": ["heat exhaustion", "stampede", "hunters", "snake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180913.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452425, "question_id": "Q43273KxBhYQ6VTNV94EwZ", "question": "Where is this bird located?", "choices": ["wild", "indoors", "outdoors", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273796, "question_id": "Q4tJiSXdeTmsX7PHrJDfCD", "question": "Vitamin A is rich in which vegetable?", "choices": ["beet", "cabbage", "bean", "carrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273796.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222514, "question_id": "Q5ggyJdW8yjUoJX8qBiJmi", "question": "The yellow item protects against what?", "choices": ["fire", "monkeys", "vampires", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400282, "question_id": "Q6fGijhtNqkrzyqMsvPc97", "question": "The boy is probably trying to control the activity on what device?", "choices": ["television", "tablet", "laptop", "phone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 509425, "question_id": "Q7XLTsqcgSuokKgSknm2H3", "question": "What disaster would be the biggest fear in this area?", "choices": ["tornado", "hurricane", "avalanche", "tsunami"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000509425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209790, "question_id": "Q7mgpYbf5tK6upCy3T4sVe", "question": "Persons wanting to cross the street here must do what now?", "choices": ["turn back", "wait", "cross", "run"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286408, "question_id": "QAUcGPmLZLbQbdtMg4Jd6U", "question": "What is the boy doing?", "choices": ["frowning", "smiling", "crying", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390302, "question_id": "QAZKJdzA6poy5EzMqcQfc7", "question": "What powers the boat that is next to the nearest dock?", "choices": ["solar", "foot pedal", "wind", "oars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575593, "question_id": "QAuXicJEhv9F3vNPYJuPyx", "question": "What kind of transportation is available?", "choices": ["rail", "road", "water", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575593.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414263, "question_id": "QAzQTPnpm6EuJGzRB27CZD", "question": "Who tied this horse here?", "choices": ["horse", "sherriff", "bandits", "it's rider"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453669, "question_id": "QBjGPpqkDQVeggs7ATtJu9", "question": "What is the person on?", "choices": ["board", "boat", "chair", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 55944, "question_id": "QC8M3ESijFHDHEmXXxSpwH", "question": "The bear is sitting in a compartment designed to hold what?", "choices": ["donuts", "garbage", "license", "cups"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000055944.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529839, "question_id": "QEgWKTKFAAvZEH4DZ4X8Q5", "question": "Which liquid poses the most immediate damage possibility here?", "choices": ["water", "coffee", "none", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529839.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182842, "question_id": "QGQRCHbv8e7P9AFTEsGErs", "question": "What are the girls huddling under?", "choices": ["palm tree", "cow", "umbrella", "igloo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000182842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 433692, "question_id": "QHNaCDigGFoc6XT3XzEBmS", "question": "What is the desk's top made from?", "choices": ["glass", "wood", "steel", "brick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000433692.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381686, "question_id": "QJBYGw4oJqMJqecfXfFHqG", "question": "The clock is shaped like an instrument that which person was known for playing?", "choices": ["hillel slovak", "leon theremin", "herbie mann", "john bonham"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000381686.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257140, "question_id": "QLRvUXwhw6odj3dxrF7MDW", "question": "Why does the animal like sleeping there?", "choices": ["wolves nearby", "food source", "cold snow", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257140.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346640, "question_id": "QMQZ7YGDPWmxukQHE4E8r5", "question": "The person who made this sign is probably against what?", "choices": ["driving", "mining", "drilling", "vaccines"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346640.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233333, "question_id": "QNVYpvwwRCXWDUZMJ5cjRX", "question": "What type of breakfast food is shown?", "choices": ["roll", "muffin", "yogurt", "bagel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 4163, "question_id": "QQXkNZuvWYXBsv8QZGAE7p", "question": "In which country is this cell phone being operated?", "choices": ["germany", "france", "england", "netherlands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000004163.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483696, "question_id": "QRJZARyVRUMoxEtD8Pnddq", "question": "What is forbidden when near the intersection?", "choices": ["turning", "blocking", "changing lanes", "yielding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483696.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439417, "question_id": "QRTJygkz6JoaQSVHfaxSa2", "question": "What is the first baseman preparing to do?", "choices": ["steal base", "home run", "defend", "hit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439417.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137393, "question_id": "QS6BvZNecPrPPda9wk3uM3", "question": "What device is on the wall?", "choices": ["television", "guitar", "headphones", "lamp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137393.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315888, "question_id": "QSbmuegWbMWU3bhwwfikUx", "question": "The items like the one on the decal are usually brought out when?", "choices": ["christmas", "easter", "ramadan", "passover"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315888.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556182, "question_id": "QSsN3ij45b232rhaYBanqJ", "question": "What is this man trying to do?", "choices": ["rest", "eat", "play", "clean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285209, "question_id": "QTjN7mGV6jhccER5vBAisF", "question": "What country is mentioned?", "choices": ["italy", "england", "germany", "poland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425708, "question_id": "QUaJmMfkhhwLBeuZdUXAv5", "question": "What type citation might the car seen here find on it's windshield soon?", "choices": ["good neighbor", "volunteer", "parking", "good parking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000425708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482711, "question_id": "QWaQnj7FkH2dibhjpkvJb9", "question": "What location related to transportation is located here?", "choices": ["parking garage", "train station", "parking lot", "bus stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482711.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83249, "question_id": "QX26LzHMXpVJS698NFvm69", "question": "Where is this meal likely being made?", "choices": ["park", "backyard", "campsite", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417464, "question_id": "QXNiGj9ZpCLPXXphkS9qbR", "question": "What are the cables for?", "choices": ["construction", "ski lift", "power", "internet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 482973, "question_id": "QXeZyMCBAXJAaQpndncCDJ", "question": "What damaging thing might get inside the computer here?", "choices": ["liquid", "sand", "ice", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000482973.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446092, "question_id": "Qbz2MU9bD7AnSSqL4PtLLV", "question": "What flavor is the drink?", "choices": ["grape", "root beer", "cherry", "lemon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446092.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252650, "question_id": "QcZZMKfahqhgxCvCFSDiwo", "question": "Why is the building red?", "choices": ["extra paint", "barn", "find easily", "for sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252650.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8985, "question_id": "QeMGW6Q5wsNQrx7qL243RV", "question": "What does this lady hope for most here?", "choices": ["calm seas", "sharks", "swimmers", "big waves"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000008985.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198454, "question_id": "QiE5usWWx3SAR4at7JuXNu", "question": "What is forbidden between four and six pm in this location?", "choices": ["stopping", "right turns", "parking", "u turns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198454.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561778, "question_id": "QigaKjWDE6mjrXz96Kxnx4", "question": "How many people can this plane carry?", "choices": ["two", "three", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518111, "question_id": "QmKbuzpCJ6H2FyyFS6EX9M", "question": "What types of bags is this woman holding?", "choices": ["plastic", "leather", "nylon", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164549, "question_id": "QnJGPp8g2cjYiDFmjezTNy", "question": "What is the large tanker truck used to transport?", "choices": ["gas", "water", "milk", "propane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449930, "question_id": "Qnq536WsZCDmE9bzhueik2", "question": "How many airlines are represented by the planes?", "choices": ["seven", "three", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449930.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172424, "question_id": "Qoce6CKTrx3DU8MwHPZhvf", "question": "What is coming from the device in her hand?", "choices": ["air", "water", "videos", "music"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172424.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346194, "question_id": "QpUC4hJEw4WbTA3c39y6HR", "question": "What is this appliance used for?", "choices": ["cooking", "cooling", "cleaning", "calling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159604, "question_id": "QspqieaUXLYUrBrmmP7avt", "question": "What animals are seen walking through the field?", "choices": ["horse", "camel", "cow", "pig"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159604.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232174, "question_id": "Qtq8sTcifmQ8g4FDkvCBNy", "question": "What type of area is shown?", "choices": ["rural", "deserted", "urban", "restricted"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232174.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391581, "question_id": "QufVTZazXXrmuNw5cckZTZ", "question": "What plant object is taller than the giraffe here?", "choices": ["grass", "bare tree", "bush", "palm tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404287, "question_id": "Qv7PAebonw9Smukavxssw6", "question": "What did the skateboarder have to do with his back foot to get so high in the air?", "choices": ["swing left", "kick right", "push down", "lift up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404287.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350036, "question_id": "QwNu6snM28jBxqvKidFdGA", "question": "What type of company advertised on the double decker bus?", "choices": ["clothing", "restaurant", "phone", "wallpaper paste"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563867, "question_id": "Qx2tosohsfDvyQSU2FbbtX", "question": "What type of transportation is shown?", "choices": ["rail", "air", "road", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563867.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387657, "question_id": "Qxxotcswo6L6Hou2UbaD3L", "question": "What can both devices he is holding do?", "choices": ["make calls", "measure speed", "take photos", "tell time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326026, "question_id": "Qz5qSsNCzrJZ4qGMovGTb8", "question": "What is the paddle in his hands being used for?", "choices": ["steering", "balance", "speed", "hitting sharks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318155, "question_id": "R2d7x3qMpkbzMystVEnASS", "question": "What type of sink is shown?", "choices": ["laundry", "hospital", "bathroom", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431736, "question_id": "R2pMTqKzsXo6dN5vYiM2UV", "question": "What kind of clothing is the person wearing?", "choices": ["semi-casual", "swimming costume", "casual", "official"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431736.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427358, "question_id": "R4pKjnHBDgAgNYE7cTtnTf", "question": "What was the name of this airline before 1960?", "choices": ["golden airways", "shamrock air", "aerlinte eireann", "irish airlines"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224498, "question_id": "R5vCu7aNAVRrfL6nRcyZQZ", "question": "How many urinals are mounted to the walls?", "choices": ["none", "two", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115857, "question_id": "R7LbLBCCjDFxETsgP2c9xf", "question": "What shape is the clock?", "choices": ["sphere", "cube", "hexagon", "pyramid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115857.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86903, "question_id": "R7RLMFV9mG2UWjqeFx4UwM", "question": "The pattern on the vest is known as what?", "choices": ["argyle", "checkered", "paisley", "stripes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086903.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254980, "question_id": "R7u3biCEaWN6WDk7vyd8h3", "question": "What is the raw material of toilet tissue paper?", "choices": ["plastic", "fiber", "zinc", "paper pulp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254980.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184083, "question_id": "RA24M9iT9vnEXnuAQPb55T", "question": "What is being shown on the various green signs?", "choices": ["home addresses", "store names", "street names", "city names"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184083.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159295, "question_id": "RAEb2F3K49sWfmaF8wNLsa", "question": "If you wanted to get clean which is your option?", "choices": ["shower", "bath", "hot tub", "pool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159295.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17014, "question_id": "RAWQefk7q3Eo7kVn2LYozu", "question": "How will the largest vehicle get to its next destination?", "choices": ["run", "sail", "drive", "fly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017014.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314550, "question_id": "RBzDiYusN3jRNJoBjUfmfD", "question": "What is needed to move this board?", "choices": ["remote", "wind", "battery", "wave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167231, "question_id": "REu5pTXj2hezrnLpjjD2WZ", "question": "How many slices of pizza is there?", "choices": ["six", "two", "four", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000167231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195123, "question_id": "REuS8mDHE4M6CLRKihH4a3", "question": "When may people seen here cross?", "choices": ["later", "now", "anytime", "never"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195123.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413323, "question_id": "RFdY4BZgL6pKQsBXHW9jtX", "question": "What type of power comes through that cord?", "choices": ["electrical", "wind", "thermal", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413323.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362961, "question_id": "RHc8yPsjjitiZuEQv6MKyp", "question": "What type of toothbrushes are outside of the cup?", "choices": ["regular", "smart", "electric", "fuel powered"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362961.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215425, "question_id": "RKVTpgoZi5nXs5fGcvwo5Q", "question": "What type of flowers are seen in the grass?", "choices": ["tulip", "rose", "violet", "dandelion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 28363, "question_id": "RLgtoeDazfYLGzhsxKKVZs", "question": "What is in the tan case?", "choices": ["music device", "instrument", "art supplies", "makeup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000028363.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 222544, "question_id": "RPJLYBpfLSZdEATF4xjjsH", "question": "What most likely formed the land in the elephant enclosure?", "choices": ["animals", "weather", "water", "heavy equipment"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000222544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343814, "question_id": "RQGUnhRoF5vcrTAQxfASmM", "question": "What is required for this activity?", "choices": ["skates", "shoes", "board", "skis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000343814.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 499022, "question_id": "RQiBHWMdpjpz9VsJRNBs6Q", "question": "What number needs to be added to the black and white signs so that every number appears twice?", "choices": ["80", "65", "52", "70"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000499022.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415729, "question_id": "RRFcRnxsLjNemyvUpHfMna", "question": "Why does he have his head covered?", "choices": ["protection", "fashion", "religion", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556595, "question_id": "RRiyGDj5jKuozFbgkWF4Lg", "question": "What is in the foreground?", "choices": ["pole", "meter", "tree", "flower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556595.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 236103, "question_id": "RTBy6QkXHX6FancyqNCLFT", "question": "How many species at least likely live in this living space together?", "choices": ["none", "three", "two", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000236103.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563566, "question_id": "RTPQ7HgQAxs6CXUDAWFh6o", "question": "What sort of material are the wheels of this conveyance manufactured from?", "choices": ["plastic", "wood", "rubber", "metal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563566.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519721, "question_id": "RTbiJHB37uKb7jCbbsjxEq", "question": "In what setting is this animal?", "choices": ["farm", "wilderness", "zoo", "park"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519721.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54048, "question_id": "RW4nbzCNoTEXjwZUv7dQnV", "question": "What is the boy using to hold the ball he hit?", "choices": ["block", "post", "tee", "hand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054048.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 568399, "question_id": "RXEemR9gZriPq4j2q4iADs", "question": "What type of animals are shown?", "choices": ["wild", "domestic", "water", "aquatic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000568399.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87359, "question_id": "RZXUdhQV7FycwoLqgRKYpp", "question": "What is the longest item here called?", "choices": ["light fixture", "space bar", "axle", "pole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087359.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68990, "question_id": "RamShgkwcxzAEFVGWALibP", "question": "How many different bears are probably in the scene?", "choices": ["three", "two", "three", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068990.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427445, "question_id": "Rb6zQaAsZiQFYEprZLnd4g", "question": "What could the man's outfit be classified as?", "choices": ["casual", "professional", "uniform", "business casual"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427445.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471527, "question_id": "RdzuYoDZf9ymqw3PzMLpr7", "question": "What is the leftmost letter on the sign?", "choices": ["c", "e", "w", "l"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471527.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151180, "question_id": "Rh96chhHz6vzrbPSarF8k9", "question": "This dish would be great for those who love what type of food?", "choices": ["vegetables", "seafood", "soup", "popcorn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151180.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348401, "question_id": "RhSWN2YQDwuinqgehZnVSw", "question": "What is the game the person played in the image?", "choices": ["sliding", "kiting", "flying disc", "swimming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459068, "question_id": "RhftGmP7rfixk4Z9HQJxww", "question": "What types of portions are available?", "choices": ["slices", "cubes", "halves", "shreds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459068.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131177, "question_id": "RiufhPio9fu7wFN8tjzfPH", "question": "The dog is in the middle of being what?", "choices": ["spayed", "euthanized", "declawed", "washed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000131177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 205277, "question_id": "RixbpBaKzfFSpXWtmVXXoy", "question": "In how many seconds will it be 202?", "choices": ["one", "none", "six", "120"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000205277.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397499, "question_id": "RjuLuEozuy3p8orTzyuzd9", "question": "What is traffic sign color for the words in the image above?", "choices": ["green", "orange", "none", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397499.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318674, "question_id": "RkSbwjnqHnxVgTofzCjNmS", "question": "Which of these can be used to refer to the horse's clothing in the picture?", "choices": ["warmer", "turnout blanket", "coat", "horse shirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453572, "question_id": "RmNDPaGRd5JcT7DJW6X7Ai", "question": "What shape is on the black sign with the white interior and black text?", "choices": ["arrow", "hand", "circle", "triangle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453572.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 497658, "question_id": "Rn2wXU9u5NESQSkb4jDNnu", "question": "What feature does the large item in green and white have?", "choices": ["antlers", "wings", "gloves", "missiles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000497658.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139117, "question_id": "RnFi6ZrC6HayUKunqXSfnU", "question": "How many dogs do you see?", "choices": ["four", "two", "one", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139117.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233028, "question_id": "RnieHY8YeuQrrLEo34LVWv", "question": "What type of transportation is shown?", "choices": ["water", "air", "rail", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233028.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375422, "question_id": "RnrWyKmWkwCsBdiCokdJGh", "question": "Which body part is this person using to communicate?", "choices": ["mouth", "thumbs", "face", "index finger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000375422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200299, "question_id": "RrRSNSXGUJokpXx2EZrQYX", "question": "How many people can get a canned beverage?", "choices": ["four", "eight", "12", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200299.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206317, "question_id": "RtB7ZMGZZ2hHnYtpUzuEyS", "question": "What is behind the curtain by the door?", "choices": ["toilet", "closet", "shower", "changing room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206317.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367145, "question_id": "RuCeNp5tXgbckJpjm4yroS", "question": "Where is the person carrying the bag on their arm walking?", "choices": ["park", "street", "pasture", "zoo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367145.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385813, "question_id": "Rv24XFkLNaupMkKQbbuEYo", "question": "What is the person riding on?", "choices": ["none", "car", "bicycle", "skate board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168859, "question_id": "RvcJAdKEc6Y72Dp3XLsVZe", "question": "Which one of these techniques were probably used to attach the design?", "choices": ["decoupage", "pinning", "stenciling", "needlepoint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168859.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169368, "question_id": "RwVMCFUzk5dFv8tat8Dcak", "question": "What is this dog ready to do?", "choices": ["hide", "walk", "eat", "sleep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169368.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50401, "question_id": "RxZzyJ3sqUkZ4KDLEsJW9P", "question": "What expression would one normally expect him to have in this situation?", "choices": ["surprise", "smile", "sadness", "frightened"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000050401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60969, "question_id": "RyZUw89BfTj9ziNzHiBmxT", "question": "What is the silver cylindrical object?", "choices": ["end table", "planter", "vending machine", "ash tray"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060969.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556076, "question_id": "RzJHLZuZcULrVkQZ9TMdaQ", "question": "Why has the yellow bird landed on the green container?", "choices": ["to bathe", "to eat", "to sleep", "to nest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543023, "question_id": "S2AaKxAP3kRpERPvp8WTiw", "question": "What's the plate on the vehicle behind the white dog called?", "choices": ["marked plate", "dinner plate", "license plate", "posted plate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192506, "question_id": "S2T6gWYohEPtSanS7fYaaY", "question": "How much is the charge for using internet connections on this bus?", "choices": ["10 dollars", "5 dollars", "nothing", "1 dollar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192506.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420779, "question_id": "S4WJMAjzcnBj4StAuomKvz", "question": "Why is the large giraffe in with the large one?", "choices": ["lacking space", "easier finding", "people like", "is parent"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000420779.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569650, "question_id": "S5iYPBzaWEjHJXZbnjLkXB", "question": "The person living here likes what best?", "choices": ["premium pricing", "sales", "coloring books", "handwritten only"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569650.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576043, "question_id": "S5qeY3y5RxLJjYT9KVmzxC", "question": "What is the man covering with the book?", "choices": ["groin", "face", "foot", "chest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576043.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214942, "question_id": "S739HKQVdmsuk2YFnpeAgE", "question": "Which part of the sign would someone who can't read English understand?", "choices": ["limit", "any", "speed", "35"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82660, "question_id": "S75RtUdMD3RfvYCFsDr2MG", "question": "What type of place might this be?", "choices": ["bus depot", "school", "church", "airport"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214321, "question_id": "S7LV7HrDRwuL3KzY8yzYsk", "question": "These animals are known for having what type of diet?", "choices": ["fish", "plants", "fruit", "meat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214321.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399953, "question_id": "S9db2pmpBdw4DyhgBJzRxD", "question": "The black spots on the pizza reveal that it was what?", "choices": ["fire roasted", "oven cooked", "microwaved", "pan seared"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399953.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572692, "question_id": "S9wTVHY7EuN84sboPcwFE2", "question": "About how tall is the tallest giraffe here?", "choices": ["12.5 feet", "12 meters", "12 yards", "12 hands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572692.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122373, "question_id": "SB9hMvdWRU8sk44oVfHQiP", "question": "What area of the body does the colorful item protect?", "choices": ["feet", "neck", "waist", "hands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122373.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10952, "question_id": "SDQTUFNpAiD8iwD8yJPhwQ", "question": "What material is the light brown frame around the clock made of?", "choices": ["marble", "cement", "wood", "bamboo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566733, "question_id": "SDTxcFntSkQ4FWZrk9akED", "question": "Which of these animals is most likely to try to taste the other?", "choices": ["zebra", "none", "giraffe", "pigeon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566733.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81240, "question_id": "SEA2DRLoTaNiVK46S29HiM", "question": "What food group is this pizza topped with?", "choices": ["meat", "fruits", "vegetables", "grains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177219, "question_id": "SECgxF9T6GAQWfwmX8K9Ki", "question": "Who is the message on the skateboarder's shirt mostly directed towards?", "choices": ["teachers", "parents", "pedestrians", "police"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177219.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87786, "question_id": "SEmrmC9CtjQkmZapwbhk6w", "question": "What country does the war take place in?", "choices": ["canada", "united states", "iraq", "mexico"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087786.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202435, "question_id": "SG5njfB8ogWN3WXWmY5UP9", "question": "What is near the person?", "choices": ["jaguar", "grapes", "plate", "trees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245044, "question_id": "SGHfktaThGqRGqkVbHCfjP", "question": "What does the store with the sign sell?", "choices": ["coffee", "steak", "crab legs", "pizza pies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245044.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 203394, "question_id": "SGRwqfY7hhhAQWC3F6pBJ7", "question": "What does the orange numbers mean for riders?", "choices": ["price cost", "year made", "route number", "passenger capacity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000203394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443743, "question_id": "SHJbha7KibYGAsXxwdG6RE", "question": "How much time in seconds is left on the parking meter?", "choices": ["89", "35", "29", "60"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443743.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 343592, "question_id": "SHUdCT7UrLzb4TqP7tDHUC", "question": "Where is this clock adorned building in relation to the location of the photographer of it?", "choices": ["sideways", "behind them", "through window", "frontward"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000343592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192282, "question_id": "SM5Uo4bUybtdJcu4DZgaag", "question": "Who is allowed to use this bench?", "choices": ["anyone", "females only", "men only", "deer only"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230295, "question_id": "SNg4HbaC3jnEYc6H2T2NAW", "question": "Why is the player using both hands?", "choices": ["stability", "posing and", "luck", "style"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000230295.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512052, "question_id": "SNgEgQFSqPeRNwGrtoZEtc", "question": "What is the woman trying to avoid?", "choices": ["snow", "rain", "heat", "sunshine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512052.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 282817, "question_id": "SPrREtFEXg6SVCJLjwTd84", "question": "Which one of these will probably need to be used on the couch?", "choices": ["vacuum", "broom", "napkin", "paintbrush"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000282817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102647, "question_id": "SQbuWVXFJVxN8QSjfBhARx", "question": "What is the purpose of the maroon tag on the bear's right ear?", "choices": ["hunting target", "proximity alert", "ownership tag", "conservation tracking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102647.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440585, "question_id": "SQemDujAdhJn8LaFXS35iY", "question": "What is the man using to get around?", "choices": ["pogo stick", "paddle", "steering wheel", "gear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443037, "question_id": "SRfHNkywy6ajFV6KAEWzPx", "question": "Where do the stairs to the left of the refrigerator lead?", "choices": ["basement", "garage", "roof", "master bedroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443037.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518143, "question_id": "STAUqUhd9xVyRzszdoHTCh", "question": "The scissors seen here are designed for which handedness?", "choices": ["both", "neither", "left", "right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518143.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269713, "question_id": "STNxCUESQMrZZPLtC7guB3", "question": "Sporting item that is generally made of injection-molded plastic is what?", "choices": ["flying disc", "ring", "chain", "kite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453099, "question_id": "SURfs2m3BzGej9ucMGCCwW", "question": "Why is he doing with himself?", "choices": ["reading", "gaming", "crying", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453099.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 240464, "question_id": "SVTecdcRq8UuJspuKCs5M6", "question": "Where would these items normally be when outside?", "choices": ["in box", "on roof", "on swing", "in soil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000240464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354659, "question_id": "SWAyhPkAsjsCJ3kjiVvUJp", "question": "What TV show might this person have just watched?", "choices": ["cartoons", "sunday news", "snl", "friday fights"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354659.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348448, "question_id": "SWKrtCwK7jWJ8HCJVhWSWA", "question": "What is the most common ingredient on the pizza?", "choices": ["onion", "seafood", "sweet potatoes", "scallops"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348448.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30571, "question_id": "SXv2wc2Jzr4zWtH8PZy6fc", "question": "What body part should you use to open the trash can seen here?", "choices": ["mouth", "elbow", "foot", "hand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030571.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54453, "question_id": "SYaJbvjLyY4M7QezJew7SB", "question": "What do the digital numbers mean for this device?", "choices": ["coordinates", "clock", "calculator", "time left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054453.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52908, "question_id": "SZq9VU44FAGJJhjMUbKeMX", "question": "What is a well known flower that is the colour of the things in the background?", "choices": ["dandelions", "daisies", "buttercups", "lavender"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328237, "question_id": "SZqVN2gnsPdAQxuLRv8h9w", "question": "What brand of shoes is he wearing?", "choices": ["adidas", "reebok", "vans", "nike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000328237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548752, "question_id": "SaZTqUGhrKsPn2pEAVjNUf", "question": "Which vehicle here would win a race if going at top speed?", "choices": ["airplane", "submarine", "car", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548752.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573302, "question_id": "SbCWxHTjHjkRo3hRkYdcQw", "question": "What type of ball would someone need to play a game here?", "choices": ["basketball", "golf", "tennis", "racquetball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119660, "question_id": "ScAefsrUfWgpSRbptFHpgD", "question": "Which cloth material item is most shocking here?", "choices": ["shirt", "tie", "face mask", "pants"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412591, "question_id": "SeBNbpnSdGTNfZxYNZWxC6", "question": "What is missing from the players tennis wear that a majority of the players wear?", "choices": ["wristbands", "head gear", "athletic shoes", "elbow pads"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412591.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558093, "question_id": "Sejo3WdfVdafQZgaLsYL8v", "question": "What type of animal would this creature be most likely to eat?", "choices": ["cat", "dog", "mouse", "kangaroo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558093.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330013, "question_id": "SgMijw9i3f7KjbhWAEjTyY", "question": "What is a probable reason the women is covering her face with the Frisbee?", "choices": ["humor", "ritual", "accident", "shyness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214144, "question_id": "Sgw8ypUDQPaGWY3UCkuVWG", "question": "This girl is playing the same sport as what athlete?", "choices": ["tom brady", "simon biles", "jim edmonds", "maria sharapova"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446528, "question_id": "ShBbQ3PSEhME5KUri7ox6X", "question": "What would the vehicle run on?", "choices": ["solar power", "gasoline", "wind", "cooking oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446528.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 430000, "question_id": "ShChLjKEyZyJZbCmRBeFSz", "question": "What type of vehicle is trapped?", "choices": ["van", "train", "convertible", "sedan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000430000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511799, "question_id": "SiUVWcSUMTB5NJjcLPiM7W", "question": "How much in cents does it coat to park for 24 minutes?", "choices": ["30", "25", "ten", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511799.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400498, "question_id": "SjNNy86DsYGBd5gG9ohrB9", "question": "What method produced the spread?", "choices": ["quilting", "crochet", "darning", "knitting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272714, "question_id": "SjgAVZgZzLXcVSWhJTwsXg", "question": "What does this animal like to bury?", "choices": ["collar", "bone", "meat", "vegetables"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245637, "question_id": "SnTQcCGNvoFJdLmP89a9v5", "question": "When getting on his surfboard where will this man's flip flops go?", "choices": ["in pockets", "beach", "thrown away", "on feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245637.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394107, "question_id": "Sq4qZKprNGmdDMYu6DcTFS", "question": "Who is most likely the batter?", "choices": ["dominic smith", "derek jacobi", "mike trout", "aaron judge"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000394107.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58010, "question_id": "Sr2hSHTonr4uAdSBbojn7J", "question": "Which one of these diets would allow for this meal?", "choices": ["normal", "gluten-free", "vegan", "low-fat"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000058010.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44789, "question_id": "SuFUDAg6eEe9XViTWLDVdd", "question": "In what way are the pair of people here related?", "choices": ["just met", "mother son", "dating", "siblings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201975, "question_id": "SubzyZGtyCzh7LHYkDBzLU", "question": "Why is he grabbing the wall?", "choices": ["balance", "finished", "afraid", "getting off"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000201975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506244, "question_id": "SuohAMiFjSVp7tTrd6GPYu", "question": "Which genus does the fruit in the monkey's hands belong to?", "choices": ["prunes", "vaccinium", "rubus", "musa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506244.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277754, "question_id": "SxSF28kXzD5Yueot6mMbf2", "question": "What animal is on top of the lamp post?", "choices": ["lizard", "dog", "squirrel", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277754.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 129531, "question_id": "SyoiJCNRSKCrutHiGvW5Sx", "question": "What would the man's long flat skateboard best be used for?", "choices": ["tricks", "ramps", "cruising", "park"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000129531.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550668, "question_id": "SzQVN4BSuSLGsxNJtTH7Wg", "question": "What concept is the graffiti artist promoting?", "choices": ["memory", "war", "peace", "love"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550668.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474796, "question_id": "T3rkPftpp93A37FqD2EUXt", "question": "Where is there a fruit mentioned?", "choices": ["street sign", "automotive sign", "library sign", "billboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474796.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303249, "question_id": "T4bBh6diChhkqWMK8GFBr8", "question": "Who is the yellow sign warning?", "choices": ["motorists", "criminals", "police", "pedestrians"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 150171, "question_id": "T4dRnkXsigxpB92WmoQBFe", "question": "The side dish is what type salad?", "choices": ["pasta", "bean", "green", "apple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000150171.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339574, "question_id": "T6PV7hkqohvdsGyAXcSVo6", "question": "What item is missing from his shoes?", "choices": ["soles", "laces", "toes", "heels"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339574.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 416253, "question_id": "T6tbSdaLM4YcLfbHUPaZj3", "question": "Which animal has fur most similar to the pillow?", "choices": ["koala", "leopord", "panda", "iguana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000416253.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152937, "question_id": "T8Pvy4coxYKGmCnec7HKje", "question": "At what type event is this person staring at this screen?", "choices": ["expo", "dairy show", "sales call", "political rally"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000152937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265323, "question_id": "T8SZFB5kWrzNybHDkJHgVr", "question": "How would one write twenty in this style of numbers?", "choices": ["mxc", "xviiiii", "ivx", "xx"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265323.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554941, "question_id": "T9GSv6XVGCNCXtCCNAP3bD", "question": "What does the animal's feet provide?", "choices": ["faster running", "mating benefits", "stickiness", "locomotion"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000554941.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156644, "question_id": "T9sFXx5sthWZNNAS6CsBqQ", "question": "The cat is hindering what activity?", "choices": ["cleaning", "bathing", "packing", "cooking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156644.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491946, "question_id": "T9voJSRgyi5NZeKGDDzfH3", "question": "Is it later on left most clock or the right most?", "choices": ["neither", "rightmost", "both", "leftmost"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286047, "question_id": "TFE6R3B2zC2x9JVZtxj7Uf", "question": "The kite shown here portrays a creature that is able to utilize what dangerous element in stories?", "choices": ["air", "stones", "fire", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114663, "question_id": "TJQkNZa3e2owxhmWwZGntG", "question": "What food items can these animals produce for humans without ending directly in their own deaths?", "choices": ["steaks", "eggs", "bacon", "cutlets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439616, "question_id": "TPyQB3aCPnvMvwhsfyeMx8", "question": "What is needed for this activity?", "choices": ["snow", "water", "ice", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439616.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515437, "question_id": "TQ5Y3UyXnRbTsByUZRv4S6", "question": "Which type of phone is this?", "choices": ["iphone", "lg", "samsung", "motorolla"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515437.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431054, "question_id": "TQLmkY6DteMQCAd74mERB8", "question": "Who or what is closest to the sailboat?", "choices": ["dog", "clouds", "pole", "people"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230794, "question_id": "TSuHfCGEg5ytZiLfFjDhQL", "question": "Why is he wearing glasses?", "choices": ["disguise", "costume", "prescription", "sunshine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255030, "question_id": "TTYDiTwk8F6WNNY44YqdLH", "question": "Which faucet would you turn to run cold water in this sink?", "choices": ["none", "right most", "both", "left most"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000255030.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381866, "question_id": "TTo7tDV6sYUDDaZUwLuUi7", "question": "What placed the toilets all right next to each other?", "choices": ["human", "gravity", "fish", "currents"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381866.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308239, "question_id": "TVN4DjBsvAdp8ySqFsSKpz", "question": "What is on the horizon?", "choices": ["sand", "water", "trees", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308239.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308682, "question_id": "TX9jcxKtckpwbz7r3LYrmj", "question": "What beverage might the person shown here have soon?", "choices": ["wine", "milkshake", "beer", "coffee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308682.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146967, "question_id": "TYqidsHxBgSKCYQp9ALBZJ", "question": "What does the boy want to do with his ear?", "choices": ["clean it", "paint it", "cut it", "wipe it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146967.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407456, "question_id": "TbUMVipEvroUtwWcRe7C99", "question": "What is this man's profession?", "choices": ["athlete", "cashier", "florist", "pilot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000407456.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99834, "question_id": "TbxcfEzngdhv3XmZMktN8o", "question": "What can be done with the brown handled item?", "choices": ["sand", "nail", "saw", "drill"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099834.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402349, "question_id": "TcUfXXbB2UDWTYMvjeHd4y", "question": "What word do you get if you put the word turn in front of the first word on the top sign?", "choices": ["turncoat", "turnip", "turnpike", "turnstyle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472156, "question_id": "TdmsceriyEqf4JbXKoHoHm", "question": "What is the purpose of the red vest on the man to the right?", "choices": ["fashion", "float assistance", "add weight", "fish deterrent"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000472156.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362250, "question_id": "TfKjLUbvS9pmW76kn3uS5t", "question": "What type of shot is the woman about to hit?", "choices": ["serve", "dropshot", "backhand", "forehand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362250.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555505, "question_id": "TgtLKAZV9cd3ZfDb8w9nB9", "question": "The woman's hair is the same color as what?", "choices": ["strawberries", "snow", "bark", "corn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 516485, "question_id": "Ti8YgPBCUThsVcvEfhNK47", "question": "The lights on the yellow post controls what?", "choices": ["nothing", "immigrants", "traffic", "vandals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000516485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161895, "question_id": "TmAaZudwscZW3sf5Qs2g6U", "question": "What season is it represented?", "choices": ["fall", "spring", "summer", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161895.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535894, "question_id": "TmDh5cnJ7SMe9WjvEk3iBE", "question": "What is stored in the plastic bag hanging from the cabinet door in the rear of the kitchen?", "choices": ["grocery bags", "trash bags", "rubbish", "plastic wrap"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535894.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209167, "question_id": "TmW4wwidQNt7ZV9k7GMCK2", "question": "In which appliance was this dish prepared?", "choices": ["none", "oven", "dishwasher", "microwave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209167.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181190, "question_id": "To8AQwJTZQmwMVqwwXTY2J", "question": "What kind of activity/verb is done with the toy in the man's hand?", "choices": ["receiving", "throwing", "flying", "sending"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000181190.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377907, "question_id": "ToorJ5gbkN6TSuuDVJ3MLS", "question": "What singer has a last name that matches the name at the top front of the bus?", "choices": ["michael bolton", "elton john", "james hetfield", "billie eilish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105723, "question_id": "TqgFe5BTEyMSQeRhKeTz8e", "question": "What type of sports do the people seem to prefer?", "choices": ["football", "water sports", "hockey", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155769, "question_id": "TqhkzQdSnRSjjkhdVUDPyh", "question": "What is an adjective that can be used to describe these objects?", "choices": ["construction", "dull", "sharp", "harmless"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317993, "question_id": "TuddvnurzcAcDGLy44p9fg", "question": "What kind of entertainment is shared by the name of the street?", "choices": ["mmorpg", "manga", "anime", "tv"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545662, "question_id": "TuxYDzbZNpcgYxmvwKoZzz", "question": "How many separate sandwiches are here?", "choices": ["six", "four", "two", "seven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545662.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27661, "question_id": "TvgbaW6sxhKy7mKE8SsxfR", "question": "What place is shown here?", "choices": ["animal farm", "park", "wilderness", "zoo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027661.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 366580, "question_id": "TwKNxfwYskofYErsH2jWVV", "question": "This paint job is meant to simulate what country's flag?", "choices": ["namibia", "usa", "aruba", "colombia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000366580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 225724, "question_id": "TyLZoThGZJWznrQLjfRufN", "question": "What nationality is credited with inventing this style of neckwear?", "choices": ["armenian", "cypriot", "croatian", "egyptian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000225724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314214, "question_id": "Tz3ZyP9qGKQDDMzJP9hUby", "question": "Where do the horse riders shown here live?", "choices": ["dude ranch", "desert", "farm", "city"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314214.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295871, "question_id": "U3EjqDC2hPsxU4jzGqTjbD", "question": "The large letter on the wall on the left is needed to form the initials of what wrestler?", "choices": ["ddp", "huh", "mjf", "ec3"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119380, "question_id": "U46BAyVit39KxYKxT44fyk", "question": "What is the shiny appliance used for?", "choices": ["cooling", "cooking", "watching", "calling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119380.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536481, "question_id": "U4CT9VEtoi4GYZFrxUncSy", "question": "If the person living here eats corn flakes what might they use to top the cereal?", "choices": ["bananas", "cat milk", "figs", "pears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536481.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 19171, "question_id": "U5r5nNJdBHJNnFBeK7mKBY", "question": "Why are they facing away from each other?", "choices": ["two males", "arguing", "fighting", "confused"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000019171.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306458, "question_id": "U6KyxBsAa9x7SNf7nHDeR3", "question": "To which direction is this woman headed to?", "choices": ["to left", "to right", "to ocean", "from ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306458.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495777, "question_id": "U6enrXmpTLhUvPCXpQxqs7", "question": "The animal seen here belongs to what type farmer?", "choices": ["cow", "pig", "none", "goad"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279336, "question_id": "U9PKXfMwoUMNAyVvAvx6mu", "question": "Which century were the vases on the shelf probably made in?", "choices": ["1900's", "2000's", "1300's", "1700's"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000279336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453177, "question_id": "UDVP3KUvSjeyKxnMAgzjLk", "question": "What process was performed to make this cafe?", "choices": ["repurposing", "manufacturing", "building", "excavation"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000453177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100914, "question_id": "UDwizKhHYL9vbbwUqEkR45", "question": "What is shown as two different pieces in the picture?", "choices": ["roof", "door", "picture frame", "wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276241, "question_id": "UFddtWbfsYtQMMjZ6EKe3M", "question": "If this is their natural habitat what continent are they on?", "choices": ["north america", "europe", "south america", "africa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566150, "question_id": "UGiZr5iPYCW2vLTfngHPjW", "question": "What time will the bell ring next?", "choices": ["eight", "two", "six", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90996, "question_id": "UJq6tLEMxbncTb29odEona", "question": "What is the current weather condition?", "choices": ["snow flurries", "rain", "sunshine", "blizzard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 313080, "question_id": "UK8Rw52rkWR3QQ4xc4deFR", "question": "Where should this bear be?", "choices": ["home", "school", "cold climate", "outside"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000313080.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102209, "question_id": "UL7zQreqagv4AQonMVZsUW", "question": "If someone had a rock in their shoe here where should they sit while removing their shoe?", "choices": ["bench", "ground", "no where", "concrete"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172125, "question_id": "ULiA6xZ693NZhn9yaKEb2V", "question": "What sport is being played at this field?", "choices": ["basketball", "disc golf", "cricket", "tennis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172125.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 244553, "question_id": "UMMDNmA7RW6GSfVopWWSBb", "question": "What is the purpose of the devices deployed on the table?", "choices": ["weaponry", "rain shielding", "aesthetics", "solar power"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000244553.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74613, "question_id": "UNMMVh4ufdwqRndukeGCNi", "question": "What is keeping someone from stealing the toilet?", "choices": ["chain", "guard dog", "cable", "lock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074613.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572020, "question_id": "UP6nqGJSh5zuBL4Rj2iCCS", "question": "What would someone come into this room to do?", "choices": ["laundry", "shop", "urinate", "cook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572020.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381656, "question_id": "UPijGRSMrG8YjfttCMQm6T", "question": "During which month was this dog sitting here?", "choices": ["july", "april", "december", "march"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381656.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519267, "question_id": "UQ4zTcHnap4e9qjuu4toJ2", "question": "What age feline is the feeding bowl ideal for?", "choices": ["newborn", "elderly", "baby", "adult"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 48798, "question_id": "UQGYNR6sD6csHctcMegs2u", "question": "What is the material of the shorts the children are wearing?", "choices": ["lycra", "nylon", "cotton", "silk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000048798.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175736, "question_id": "US7noQMu4AWDYd9ZHhRPrV", "question": "What object contains liquids to left oils and dirt off of a person's hands?", "choices": ["soap dispenser", "painting", "faucet", "plant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175736.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337764, "question_id": "USjFUDHpzbMGH2pXdHMX8a", "question": "What type of fence is this?", "choices": ["wood", "vinyl", "barbed-wire", "wrought-iron"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337764.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547788, "question_id": "USnSDrumG2tcoSzUhA4nEz", "question": "What is this little birthday girl fond of?", "choices": ["bees", "ladybugs", "dragonflies", "butterflies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547788.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219584, "question_id": "UUaSdSnY7YKSaoyddPhuqe", "question": "How is the meat cooked?", "choices": ["charred", "medium", "rare", "well done"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000219584.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277349, "question_id": "UVjeqv6dpuahmGfTpD7d92", "question": "Which part of the UK is this in?", "choices": ["north west", "south east", "north east", "south west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532871, "question_id": "UWcTA7ybc7BkDDV8wFWWaw", "question": "What is the tractor constructing?", "choices": ["freeway", "sea wall", "golf course", "walkway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6592, "question_id": "UXMYoChW2vLEVtCvevPW8p", "question": "What type of wheels does the conveyance shown here have?", "choices": ["rubber", "foam", "steel", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464205, "question_id": "UXgQd7KosSSzHvYy3spuQq", "question": "What likely caused the image to appear on the screen here?", "choices": ["cat paw", "nothing", "man hands", "automaton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464205.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 167766, "question_id": "UYeSf6YJ3De9cYAodzL7xu", "question": "What is the shape of a hair dryer?", "choices": ["egg shape", "t shape", "gun", "l shape"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000167766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230719, "question_id": "UYkqYq6ELG4fPig83oCVqU", "question": "Which visible item allows someone to hear the train coming better?", "choices": ["boom box", "light", "bars", "bell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256147, "question_id": "UYtVqy2dUxCKzwhXtwZKGb", "question": "How many plumping fixtures are there in this room?", "choices": ["six", "four", "three", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200349, "question_id": "UYxLqfH74CTtWtKfLZuHgC", "question": "What food item is shown that is categorized as something other than plant or animal based?", "choices": ["peppers", "mushroom", "olives", "onion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315614, "question_id": "UZB4uzRafm4LkSsFSBFhPJ", "question": "What color would the blank area above the ground be in a colored picture?", "choices": ["purple", "yellow", "blue", "red"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315614.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 370166, "question_id": "UZhyz7cJ5ofnkKaBh4zaiq", "question": "When will the grass turn green again?", "choices": ["rainy season", "never", "dry season", "snowy season"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000370166.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102747, "question_id": "UZq6kuchWHZQeEPZkdR6ML", "question": "Why is the book open?", "choices": ["trapping dog", "person reading", "random", "reading-to-dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102747.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 316184, "question_id": "UaUKcM2s434P2eaPYMngon", "question": "What is located directly underneath the monitor?", "choices": ["bed", "plant", "food", "animal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000316184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 539625, "question_id": "UbXukCsfdgUafq2em56ky5", "question": "The cat is located on which continent?", "choices": ["america", "europe", "africa", "asia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000539625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514517, "question_id": "UbgQDAknr6LbfA5BdTR3M7", "question": "In which continent is this stop sign and street lamp located?", "choices": ["australia", "south america", "europe", "north america"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514517.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567860, "question_id": "UdVhsHqwJWk4oWpCmbaCUH", "question": "Which banana is most likely getting touched by the cat?", "choices": ["middle hanging", "left hanging", "table banana", "right hanging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567860.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362160, "question_id": "UfWfyvxuCpnKGy7jjaPQHm", "question": "What is on the floor between the sink and toilet?", "choices": ["basket", "shoes", "egg", "cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362160.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26412, "question_id": "UhoseYNddeKrf5dkWJAH3C", "question": "Stygian is another term of what?", "choices": ["dark", "vision", "light", "mission"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026412.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56697, "question_id": "UiLKRdrehdwK7QRTqLeioC", "question": "Who can be saved by the red ring?", "choices": ["murder victim", "drowning victim", "choking victim", "accident victim"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233514, "question_id": "Ujd4EucYFe9wJ9hN3V8gNz", "question": "What item would a person most likely use to clean up spilled water?", "choices": ["bowl", "handle", "toilet paper", "wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233514.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341582, "question_id": "Uk3wTLjoRLuq8gpsanvkQh", "question": "What is a small train called?", "choices": ["small rail", "short rail", "heavy rail", "light rail"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000341582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 208435, "question_id": "UmMYBq97CquxWTyvvBcdyj", "question": "What type vehicle uses the upper deck of this bridge?", "choices": ["trains", "automobiles", "pedestrians only", "boats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000208435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204476, "question_id": "UnfyCAmTjvUdVCYcKJoKwQ", "question": "What sport are they probably doing?", "choices": ["jumping", "jet-skiing", "kitesurfing", "surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227106, "question_id": "UpF8Td9AqvcjuBSXmrqrzk", "question": "What street intersects with slope?", "choices": ["east", "east hill", "hill", "peck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101992, "question_id": "UpTUPJvSYrGDFEkFgT2xuj", "question": "Why does this person have their head covered?", "choices": ["cleanliness", "protection", "religion", "fashion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000101992.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 228141, "question_id": "UqN8dTBHbi9sgkRgPAhWcV", "question": "What scientific table is visible here?", "choices": ["periodic table", "poison list", "wesh barr", "bone chart"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000228141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 165152, "question_id": "UqVbQCrEbTWuJdKEmQ8wBK", "question": "What is the dog standing in?", "choices": ["grass", "water", "dirt", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000165152.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381039, "question_id": "UrBEowAi3iPJsB4X5Wofid", "question": "What is the white object near the black box made of?", "choices": ["wood", "concrete", "glass", "porcelain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402111, "question_id": "UrBwASGMDY9EsA8LxzQMBs", "question": "Why are theses objects here?", "choices": ["cleaning", "for sale", "discarded", "stolen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 397340, "question_id": "Urev6YDRrPdh87eg43xaCf", "question": "What kind of kitchen is shown?", "choices": ["commercial", "hospital", "residential", "mobile"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000397340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319580, "question_id": "UsB9ZEtky2YLUFZvmKqHMu", "question": "What is the plate made from?", "choices": ["glass", "paper", "wood", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 128026, "question_id": "UsC3e7mU6xaDsuyfPJQcCv", "question": "Why is the cat's face brighter than the rest of its body?", "choices": ["coat coloration", "recently cleaned", "camera flash", "breed aesthetics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000128026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45275, "question_id": "Ut4F9eavCmrLcw2fpYujVV", "question": "What type of customer is this donut designed for?", "choices": ["child", "businessman", "senior", "woman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045275.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 70538, "question_id": "Uvjf2TnNSmeKc2D9DGaFz8", "question": "Why is the dog on the bow?", "choices": ["lost", "likes it", "running away", "leashed there"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000070538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 460957, "question_id": "UwTwP4SgUfuTHs7dqWppFv", "question": "What motorized object is the man about to pass?", "choices": ["bus", "rv", "truck", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000460957.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317810, "question_id": "Uwhcg94qFR4Vtwa9KAy6PS", "question": "The 40 on the sign refers to which one of these units?", "choices": ["hours", "miles", "ounces", "gallons"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317810.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318732, "question_id": "UwpHrRtmdNsSpcdRj9DipR", "question": "Where was this located?", "choices": ["grocery store", "house", "hospital", "restaurant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318732.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 134504, "question_id": "Uyj2j7CHWZMcNeZeDFgTwf", "question": "What would you place in the black bin on the left wall?", "choices": ["towels", "water", "clothes", "garbage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000134504.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345765, "question_id": "Uymm6WEH4fX9smrrEdzCEf", "question": "If you ran out of toilet paper whom should you call?", "choices": ["front desk", "grandmother", "wal mart", "neighbor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 523969, "question_id": "Uynd2FANXi3E8yfUh9qupr", "question": "What type outdoor activity is being held here?", "choices": ["clown parade", "military tribute", "rodeo", "fair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000523969.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471657, "question_id": "UzXta4jC9mxZjSGGGo6c5y", "question": "What is the only type of vehicle permitted in the lane between the traffic lights?", "choices": ["motorcycle", "sedan", "train", "bus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537076, "question_id": "UzgYLWe8qmTAg3oWrNPLRU", "question": "What element is the person using to move?", "choices": ["water", "wind", "fire", "earth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525683, "question_id": "UzjaJsKKoNbH9R6zsBMhuA", "question": "What does the item in the child's hands help with?", "choices": ["diving", "snorkeling", "swimming", "surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525683.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114270, "question_id": "V2A4PjiPBaoZPt5EMpV2fe", "question": "This structure allows people to travel where?", "choices": ["by air", "through mountains", "across water", "underwater"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000114270.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191550, "question_id": "V2iyLnHGxXy8vWouLQcxY2", "question": "What food group is shown on this vase?", "choices": ["grains", "vegetables", "fruits", "dairy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191550.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288038, "question_id": "V33KSrbLFFCP22V52VaJkn", "question": "What activity is the above woman doing on the vessels?", "choices": ["decoration", "coloring", "breaking", "molding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288038.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537004, "question_id": "V3e6iMcHd9zy38QsoMca6o", "question": "The white protrusions are made of what?", "choices": ["chalk", "ivory", "marble", "opal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537004.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549791, "question_id": "V3pWHv5L3xBTYgAftjZpB4", "question": "What can be said about the zebras snouts?", "choices": ["smooth", "hairy", "monocolored", "clean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549791.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29167, "question_id": "V4HyLvYYK59wAyyoQTjcqT", "question": "What type of shot is the woman about to hit?", "choices": ["forehand", "backhand", "serve", "slice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029167.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409552, "question_id": "V4WerYTB3NDjRkufQpKgEq", "question": "What is the main ingredient in concrete?", "choices": ["rock", "lime", "sand", "silt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409552.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8046, "question_id": "V4mTaU92CAZsH8dPENH5wP", "question": "What is on the woman's lip?", "choices": ["ring", "frosting", "cake", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008046.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193353, "question_id": "V5SpqKACrXqKFN7mLjzMRz", "question": "This animal's young is referred to as what?", "choices": ["kid", "puppy", "calf", "kitten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 115557, "question_id": "V5XmnDYEE49hfvHPypNguZ", "question": "What type of event is this cake being served at?", "choices": ["christmas party", "wedding", "retirement party", "birthday"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000115557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393920, "question_id": "V6DBi3bwAcdBMAyyVcFZUv", "question": "What is the new name of this airline?", "choices": ["caribbean", "emirates", "latam", "delta"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393920.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247147, "question_id": "V6NZSqtGCqUuaNqzsUCfdB", "question": "The clock tower is experiencing which season?", "choices": ["spring", "fall", "winter", "summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221447, "question_id": "V6cW74RvoS2QosVsP6f6Vf", "question": "If this animal is attacked which feature might do the most damage to the attacker?", "choices": ["bell", "tail", "horns", "ears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147244, "question_id": "V8uWoezDxVJzMfz2JM8wYK", "question": "What is this person doing with the red stuff?", "choices": ["brushing teeth", "shaving", "eating lollipop", "pulling tooth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147244.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206959, "question_id": "VBVAMpTHvE8DRYb7UnpKoR", "question": "What part of the larger animal is fully hidden?", "choices": ["quill", "body", "face", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206959.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 88535, "question_id": "VCP9bxpvZGnBW7EkUSDpDX", "question": "What genus does the plant on her pants belong to?", "choices": ["pimenta", "ribes", "malus", "prunes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000088535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382562, "question_id": "VCbPZbdjTRj6xnr7waE7zd", "question": "What is happening on top of skyscrapers shown here?", "choices": ["construction", "sunbathing", "advertising", "deliveries"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382562.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25856, "question_id": "VCnDrfNZ4WkdtThifnQRrA", "question": "What material is the cover of the phone made of?", "choices": ["marble", "jewelry", "plastic", "gems"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025856.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 450809, "question_id": "VDUGWoYG4VhZuUPQsbgL5c", "question": "What are these zebras trying to do?", "choices": ["eat", "swim", "bathe", "drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000450809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 771, "question_id": "VELAQ6kzDctwywE3MauUHr", "question": "Which musical artist is the graffiti on the stop sign referencing?", "choices": ["eminem", "r. kelly", "mc hammer", "drake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263808, "question_id": "VGJ9v73oa3qLyGnHCd6Jg7", "question": "People most likely come to this building to do what?", "choices": ["shop", "learn", "date", "heal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263808.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206124, "question_id": "VGSFQcdPWJKoBYCCNjppdc", "question": "What stops kites like these from flying away?", "choices": ["hopes", "snow", "rivers", "string"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206124.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495873, "question_id": "VGkG3Ki77nafeZYmtsgVEU", "question": "Nintendo is a developer of what?", "choices": ["lens", "wii", "microphone", "speaker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495873.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442805, "question_id": "VHYFJsRvUep2UC5VafLv4L", "question": "On what type of furniture is this cat bed located?", "choices": ["chair", "hutch", "table", "bookshelf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442805.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160959, "question_id": "VJf97ohMpELiCmmDFqeKhh", "question": "How will this horse cross this river?", "choices": ["under water", "fly", "walk across", "backwards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160959.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 96337, "question_id": "VJii9v54yvDkR9trGzjj2D", "question": "Why are the flowers opening?", "choices": ["scared", "dying", "eating", "blossoming"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000096337.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 278407, "question_id": "VJx94Y7KbSzUrcv85575wT", "question": "How many nearby devices can be controlled remotely?", "choices": ["five", "two", "four", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000278407.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471509, "question_id": "VKtKSbQE3vZamuJd7YnfgN", "question": "Why might the items be lined up on the floor?", "choices": ["to vacuum", "to play", "to redecorate", "to game"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471509.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511667, "question_id": "VLskhjZj3bPK7LfcjBgR3o", "question": "How many people likely share this bathroom?", "choices": ["five", "ten", "2 dozen", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511667.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488813, "question_id": "VLtuaSwATKANckQFyiqHED", "question": "What keeps these horses in place?", "choices": ["wooden fence", "nothing", "barbed wire", "electric fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107717, "question_id": "VMqWT8GYFyRUeuSW7pXqjD", "question": "When the clock shown is working what movement does the pendulum at the bottom show?", "choices": ["vibrating", "none", "swinging", "up down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107717.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85497, "question_id": "VPgfthT54eK3oz579SANk5", "question": "What century does the phone look like it is from?", "choices": ["22nd", "15th", "16th", "20th"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085497.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186575, "question_id": "VUQbsiKQ2fhtvPVWao6a3L", "question": "What activity is being performed by the central animal?", "choices": ["mounting", "hunting", "running", "grazing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303200, "question_id": "VXKk39BZf8WKJDbShwyy4g", "question": "What is inside the animals on display here?", "choices": ["blood", "stuffing", "bear guts", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273720, "question_id": "VXXV8TwVpxKDmd423FpmPg", "question": "He is wearing this accessory because of poor what?", "choices": ["eyesight", "speech", "taste", "hearing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273720.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377470, "question_id": "VXa4GsbsahA58FoYPhkeco", "question": "What animal is most similar to the animal here?", "choices": ["tiger", "ant", "monkey", "toucan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377470.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519914, "question_id": "VZePuunjPjyrDnKffDPm55", "question": "This intersection is in what metropolitan area?", "choices": ["vancouver", "toronto", "chicago", "seattle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467608, "question_id": "VcbAWHrutMbaNSFvRdciT2", "question": "What type of animal is hugging the teddy bear?", "choices": ["alligator", "camel", "cat", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456533, "question_id": "Vf6pnSpKMsgfHzRoepdHky", "question": "How did the person highest up get up there from the ski lift drop off point?", "choices": ["uber", "flew helicopter", "separate lift", "skied"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 392676, "question_id": "VfHZboGRBxCweS8o8mKxqT", "question": "What group is known for using this vehicle?", "choices": ["tank commanders", "newsies", "bikers", "babies"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000392676.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345199, "question_id": "Vh4yvHtMNdpFg4px3s8XwJ", "question": "From which position could you most easily flush this toilet?", "choices": ["standing", "outside room", "sitting", "lying down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345199.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439498, "question_id": "ViRqPhqosZeSktB3mDTTn6", "question": "What is the profession of the person who sells the item for the vase?", "choices": ["plumber", "baker", "florist", "barber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439498.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472712, "question_id": "Via5P4PkoKgzJJpQrZo7Wo", "question": "What bird name matches the name of one of these streets?", "choices": ["hawk", "emu", "canary", "eagle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472712.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36008, "question_id": "VjQ5K9dmR9cBehNkfdErRP", "question": "How many zebras do you see?", "choices": ["eight", "seven", "six", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 197303, "question_id": "VjdTt5EqVwcZQt2R66S8dV", "question": "The stuffed toy that the sleeping child is holding is from which popular video game series?", "choices": ["portal", "mario brothers", "sonic", "final fantasy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000197303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2793, "question_id": "VjgVatnF6xjAfF9XZiXRGK", "question": "After biting this dog What will this person next consume?", "choices": ["lemon pie", "beer", "veal", "soda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002793.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555232, "question_id": "Vjv5TtbvSavsb37jaE8Ztn", "question": "What does the adage say will be kept away if one eats these fruit?", "choices": ["constipation", "acne", "cancer", "doctor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555232.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360583, "question_id": "VnmP3hcfY3Vg832nobM9mj", "question": "What activity is the man performing based on his equipment?", "choices": ["channel surfing", "singing", "video games", "dancing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17978, "question_id": "VomdFSbs79LnC4o5kggAT8", "question": "Why are the feed baskets so high up?", "choices": ["keep clean", "hiding", "easier filling", "for giraffes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017978.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288261, "question_id": "Voq45S7XwZkok3yHSAz9zj", "question": "What entity might be most likely to own or control these vehicles?", "choices": ["military", "toy", "shipping vessel", "transport company"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332641, "question_id": "VpbURRkRZVyoTaoBZT55qJ", "question": "The pastry seen here is of what size in relation to typical ones normally sold?", "choices": ["same", "thinner", "smaller", "large"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533340, "question_id": "VqNxB62DKv4PNVfJVGQkVN", "question": "What does the woman have around her neck?", "choices": ["scarf", "jewelry", "tattoo", "collar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355669, "question_id": "VqS6mbn5NuDEJMYkXbZxST", "question": "What is the player's position called who is wearing green?", "choices": ["goalie", "point guard", "leader", "forward"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355669.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192857, "question_id": "VsfWv2octsFpxbECyTfqDk", "question": "What is the green item known for?", "choices": ["vitamin w", "msg", "vitamin k", "soy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192857.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309486, "question_id": "Vsu7f2wPNF8GirTKrMK34t", "question": "What is needed for this activity?", "choices": ["road", "wire", "rope", "ramp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309486.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157683, "question_id": "VsxEieuRZyHPKwdx5ysoyM", "question": "Where would a person grab to use the object on the left side?", "choices": ["blade", "board", "handle", "pizza"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157683.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186746, "question_id": "Vt5XcQmaybRoW5jY88KtHC", "question": "The player and ball are most likely in a position for what tennis situation?", "choices": ["net return", "baseline return", "chip shot", "serve"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186746.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467093, "question_id": "VwT8joQ9SBrksQxqyAPUjH", "question": "What is the main diet of the bear that is depicted here?", "choices": ["acorns", "bamboo", "maple", "eucalyptus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467093.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 322310, "question_id": "VwvYtgzq3UNjg2Ypfe3C35", "question": "What kind of training is the young person here getting?", "choices": ["potty", "cleaning", "reading lesson", "singing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000322310.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400452, "question_id": "Vxy24xf4sGW5RWXyYTsPSz", "question": "What kind of object is the food inside?", "choices": ["pot", "skillet", "frying pan", "platter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503578, "question_id": "VypSKWdEdYQ7RHdSc2GuJ8", "question": "What purpose does the silver object on the man's arm provide?", "choices": ["nature", "weapon", "space", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 496071, "question_id": "VzB3vExHEbZgsUdvjpFVci", "question": "What is the outer part of the item on the plate called?", "choices": ["core", "rind", "crust", "stem"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000496071.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196303, "question_id": "W2W9skQm5RNti7qvtWCSLH", "question": "What kind of party is being shown?", "choices": ["birthday", "tea", "surprise", "wedding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187851, "question_id": "W3JZuYmjbcdErRoLDWRmRz", "question": "Why would someone sit at this table?", "choices": ["to paint", "to work", "to eat", "to saw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000187851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188155, "question_id": "W3WBUYfcb4UQeJQJScGqpg", "question": "What is this bathroom missing?", "choices": ["sink", "mirror", "shower", "bathtub"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188155.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472385, "question_id": "W3YfR9fpiwf3Xrrzk2yyC2", "question": "Why is the seat and tank black?", "choices": ["for sale", "fresh paint", "colorless photo", "matches curtains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472385.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25750, "question_id": "W479QAusC8qHKPxq8rqReA", "question": "How long will the flower pedals continue to appear alive in the bowl?", "choices": ["one year", "three months", "three weeks", "three days"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025750.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303952, "question_id": "W5b7DHdXWtW3BDhTHKsq9r", "question": "What does the white street sign warn about?", "choices": ["traffic delays", "driving speeds", "road blocks", "hurricanes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303952.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391208, "question_id": "W7wSusYpGGZoUwCQteQVbS", "question": "What is a baby of this animal called?", "choices": ["puppy", "doe", "calf", "kitten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388833, "question_id": "W83XaiXZtNsbavojsyKimM", "question": "What food group is pictured?", "choices": ["dairy", "vegetables", "fruits", "grains"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000388833.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326361, "question_id": "W8kRdiPdo2HDA5DjNK6R7B", "question": "What is this dog trying to do?", "choices": ["rest", "attack", "play", "run"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326361.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290777, "question_id": "W9nm9Sv5XyQ5yEhKvpPkQF", "question": "What would cause the bird's eyes to be closing in this situation?", "choices": ["fear", "hunger", "boredom", "fatigue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290777.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160127, "question_id": "WA69otgGmkMwsbwL9hiHmg", "question": "What is the primary food source of the animal in the picture?", "choices": ["elephants", "lions", "fish", "grass / leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000160127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301217, "question_id": "WAH5JPxsGVjnKxHzDb46h2", "question": "What must be distilled to get the item in the clear bottle?", "choices": ["water", "oil", "grain", "milk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301217.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 85609, "question_id": "WBdL59KiAfk9xV3jjV9XM7", "question": "Where is the most likely place one might see this type of bus?", "choices": ["display", "highway", "downtown", "country side"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000085609.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 210479, "question_id": "WC9pL7hcDj5pQUzK6P7VaA", "question": "What would cause a person in a car to get a ticket very quickly in this area?", "choices": ["accelerating", "turning", "honking", "stopping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000210479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232854, "question_id": "WE3JNjDvH5mQZzrtwgwpBP", "question": "Why is she using an umbrella?", "choices": ["snow", "disguise", "rain", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155827, "question_id": "WEPDNK5ExqniSQu9ozhrdu", "question": "What is the likely gender of the person above?", "choices": ["female", "child", "boy", "male"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155827.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461064, "question_id": "WEZrqdkF6EooicHQkSR7PG", "question": "What material is the black latticework surrounding the clock constructed out of?", "choices": ["steel", "aluminum", "copper", "iron"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575195, "question_id": "WGgvVf8tgftrpqoiGQL8YL", "question": "Why does the man have a strap around his leg?", "choices": ["save surfboard", "agility", "speed", "balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575195.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 467058, "question_id": "WJziExLo4k3cKkshDGGkJY", "question": "Where is this game being played?", "choices": ["mud", "field", "court", "grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000467058.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473033, "question_id": "WNFxFjHBNtonFHEpRydp5N", "question": "What information is provided?", "choices": ["location", "date", "temperature", "speed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473033.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 145973, "question_id": "WPqwDnNPP5rZStq3Pd6DKK", "question": "In which state is this roadway located?", "choices": ["iowa", "minnesota", "idaho", "indiana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000145973.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442079, "question_id": "WQTJt94jSGqZsYUFStviBF", "question": "What will they do to the boat after using it?", "choices": ["slash", "give away", "deflate", "sink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442079.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 37146, "question_id": "WQuQ5FikEoAHRgG6qcxAdR", "question": "What type of material would the scissors be best for cutting?", "choices": ["cardboard", "leather", "paper", "fabric"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000037146.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 84601, "question_id": "WRgdCPqjTyjBGgo33ZPiVE", "question": "For what season is the stuffed animal shown here dressed?", "choices": ["july 4", "easter", "christmas", "halloween"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000084601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 322706, "question_id": "WRjfg8A6T4xxj4coqtQkWr", "question": "What is a possible hazard to the skateboarder in their current location?", "choices": ["sign", "bollards", "trees", "cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000322706.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427222, "question_id": "WSvqQAdqpe9aKCm6GDLWJk", "question": "What does the cat here have it's eye on?", "choices": ["photographer", "interlopers", "tv", "outer wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427222.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74459, "question_id": "WTK2GxQdGdvSDb4vBos2EZ", "question": "What is being used to move the board through the water while standing?", "choices": ["feet", "paddle", "paws", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104350, "question_id": "WUH4Snhap6QSTsrfisXQto", "question": "Where does this buses route lie?", "choices": ["usa", "no where", "england", "perimeter field"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000104350.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171922, "question_id": "WV4PTpVQVukxiY9ZRMJLaq", "question": "What fluid is found inside the individual wearing a bracelet?", "choices": ["blood", "stuffing", "soup", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399429, "question_id": "WVGFUTuWUG5wSVKHd2gsWU", "question": "What is the zebra doing?", "choices": ["hiding", "mating", "foraging", "fighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000399429.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332107, "question_id": "WVP7eZi3Ser5vnLYePUuUi", "question": "What kind of phone is being used?", "choices": ["landline", "cellular", "corded", "rotary"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332107.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404932, "question_id": "WVQAGM8a86aU7W6prb8ncn", "question": "What will likely happen now?", "choices": ["freeze food", "cook food", "clear plate", "take bite"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147757, "question_id": "WWUt2odVeBgxEAKnszL6x7", "question": "What are the large towers on the left usually called?", "choices": ["sky elevators", "zip lines", "moving towers", "ski lifts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135654, "question_id": "WX7okqDe9WJZ4ftdZQUCdp", "question": "Which one of the following establishments often has fixtures like this?", "choices": ["auto store", "department store", "furniture store", "convenience store"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135654.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440961, "question_id": "WXe3x93d4V79SSJLAFwtYh", "question": "Persons driving here should watch out for what?", "choices": ["nothing", "children", "senior citizens", "mountain goats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440961.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286867, "question_id": "WXiEx8DHNqP3qta3iZpshg", "question": "What is the boy with the board about to do?", "choices": ["jump in", "go home", "eat lunch", "clean up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000286867.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86634, "question_id": "WY8xWKSruvdYDEPjXXSR4B", "question": "Which part of the toy is probably the hardest to the touch?", "choices": ["eye", "nose", "ears", "stomach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086634.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376212, "question_id": "WZzpW9TB6QABG7zDRmjnxR", "question": "What team plays in the state that is mentioned on the banner?", "choices": ["longhorns", "pirates", "nuggets", "mets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376212.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271062, "question_id": "WaCJtmVTqCD5MqwSPnLCxg", "question": "What type of toy is this?", "choices": ["stuffed", "electronic", "educational", "infant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000271062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2298, "question_id": "WaPFRsF3ZPyAbQ4EihUjL8", "question": "What do the woman's socks represent?", "choices": ["nature", "bear", "fashion", "lgbt pride"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002298.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258997, "question_id": "WamWGASc8spg45VMuNMm4g", "question": "What is near the controller and the hand?", "choices": ["black cat", "banana", "toe", "pizza"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000258997.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279336, "question_id": "WdasXaTjABQp2Y8mcucwSw", "question": "What is the surface the urns are sitting on made of?", "choices": ["cork", "granite", "marble", "porcelain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51888, "question_id": "WeVbwJzj4HwaS7etUR8PDE", "question": "What can you see beyond the water body?", "choices": ["animals", "birds", "nothing", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051888.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231960, "question_id": "WfR5HdN9xEHUhWU753uZqY", "question": "The man's face indicates the boat could be doing what?", "choices": ["jumping", "stopping", "slowing down", "speeding up"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 228406, "question_id": "Wfo56P9WK5wE68gUtJpSXF", "question": "The upper part of this appliance will keep your food?", "choices": ["warm", "hot", "cool", "frozen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000228406.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 335067, "question_id": "Wg8Ri3fY6xgpAYhU9j7AhK", "question": "In which location is this clock likely positioned?", "choices": ["athiests hall", "mall", "church", "mcdonald's"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000335067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138657, "question_id": "WgbJeJ9LJpiPGYUoX7E2Wk", "question": "Which part of the race did the biker just pass?", "choices": ["start", "middle", "finish", "lap 1"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138657.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127016, "question_id": "Wgn4Xzm7TEUzWpGdpBivwP", "question": "What type of sign is on the bench?", "choices": ["sale", "promotional", "memorial", "directional"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127016.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288960, "question_id": "WhRrRqq5XssjuTLy44dg32", "question": "What items are sold inside this shop?", "choices": ["faberge eggs", "milkshakes", "magic tricks", "rugs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288960.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507336, "question_id": "WkZLGvP3Uxm3bMYx9d6WyL", "question": "What game does the owner of this computer imagine they play best?", "choices": ["checkers", "cricket", "rugby", "warcraft"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101600, "question_id": "WnKjAopKMcqV2dqWjomtB9", "question": "Why is the small dog on the couch?", "choices": ["to hunt", "to play", "to sleep", "to eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000101600.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422363, "question_id": "Wppe3s852FaX7NCBzRnszF", "question": "What has to be utilized to wear these pierre cardio items?", "choices": ["clasp", "nothing", "knot", "soda"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000422363.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212971, "question_id": "WqVVuywBayPGkspFTjgjvL", "question": "What is the Volkswagen symbol?", "choices": ["ww", "vv", "vw", "bmw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74889, "question_id": "Ws2wY8AHTPb8SYYEMttwFr", "question": "Which TV network's logo is in red above the bus?", "choices": ["cbc", "cnn", "cbs", "fox"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074889.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154840, "question_id": "Wsk6bDvcZkeKvPyueeUHnN", "question": "What might most damage this building soon?", "choices": ["sun", "lightning strike", "snow", "dew"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154840.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408741, "question_id": "WspMQrwhPJCisHpjPhdF2m", "question": "What is required for this activity?", "choices": ["rain", "wind", "sun", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000408741.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396875, "question_id": "WvgLQWU9QA7GkbEjgESdvj", "question": "What do the symbols here signal?", "choices": ["pineapples", "music", "drama", "apples"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396875.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34066, "question_id": "Ww3VPWxbWHasMRshfGiRPg", "question": "What aspect of this photo makes it most ironic?", "choices": ["window", "animal planet", "yellow cat", "white cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11521, "question_id": "WwYb6XGf4KutsuZRpurgMy", "question": "What is keeping the camera attached to the front of the surf board?", "choices": ["rope", "gravity", "mount", "tape"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000011521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401129, "question_id": "X2t8H4GEyka4qkmA2YesXt", "question": "This animal has what feature?", "choices": ["gills", "stinger", "trunk", "feathers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401129.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30237, "question_id": "X32VQea2jnBchgUkBkdb5K", "question": "What company provided this blow up to this man?", "choices": ["crest", "arm hammer", "colgate", "ball's gags"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56177, "question_id": "X4iVPJeGiSf5trqkv8XCaE", "question": "What foodstuff can be found growing on the long brown thing in the foreground?", "choices": ["mushrooms", "eggs", "steak", "honey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484106, "question_id": "X53L8fXHx6xtFvr69oufEg", "question": "For what this Wii is used?", "choices": ["movies", "gaming", "audio", "music"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000484106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 496071, "question_id": "X6KpZnqEcWQqYUtQV4YV8Z", "question": "These fruit are rich in which one of these nutrients?", "choices": ["vitamin b12", "fat", "protein", "vitamin c"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000496071.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 538241, "question_id": "X73e4WWh66Lvd4Fb55WFVd", "question": "What type of transportation is used here?", "choices": ["rail", "air", "land", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000538241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277840, "question_id": "X8k2H6yjDuMccGmbYNnHVA", "question": "What is the man trying to gain on his throw by jumping in the air?", "choices": ["distance", "aesthetics", "accuracy", "pride"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277840.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573036, "question_id": "XBy5XJBhLNNVbS4Ljcahhd", "question": "What is the disk made from?", "choices": ["steel", "plastic", "marble", "wood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000573036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354858, "question_id": "XCGv2VyEpEm63PAyRUMAvv", "question": "Why are the elephants here?", "choices": ["thirsty", "lost", "hungry", "lonely"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354858.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25927, "question_id": "XCPJ6ePUks23bBUukxFAw6", "question": "What country's flag is on the fender?", "choices": ["united states", "brazil", "canada", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000025927.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371707, "question_id": "XFRGdLF7H4HRpG4or7DwNw", "question": "Why does the girl have her arm in the air?", "choices": ["reach", "break fall", "wave", "balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289851, "question_id": "XK7gPW2hPBjHn3gQPZTPiS", "question": "This appliance is able to preserve what long term?", "choices": ["nothing", "chilled meat", "dairy", "frozen items"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000289851.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24286, "question_id": "XKTEHX7i5VAxguAmh3s537", "question": "The tourist riding on top of the elephant is visiting which country?", "choices": ["laos", "vietnam", "china", "thailand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418241, "question_id": "XQoW4dMtaCV4AL7nGA5LgA", "question": "Why is he wearing a glove?", "choices": ["grip", "warmth", "fashion", "health"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418241.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183879, "question_id": "XTW4YJz9xQwN97EXWAi783", "question": "What is the purpose of the green knit pouch next to the iPhone?", "choices": ["phone holder", "phone warmer", "coin purse", "wallet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183879.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 464758, "question_id": "XUKw3yJjyK2YKniAnRcpFv", "question": "Citizens can use the nearest structure for what?", "choices": ["living", "gardening", "swimming", "rest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000464758.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86347, "question_id": "XVMsjQuR9bF2dwgUFGVX6S", "question": "What pattern is painted on the planes?", "choices": ["stripes", "camouflage", "stars", "plaid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38759, "question_id": "XVkf4ESet677Z9v6ogFkRS", "question": "Where are the yellow vehicles taking the passengers?", "choices": ["to library", "to jail", "to hotel", "to school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038759.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357986, "question_id": "XVpBVwn5Ry4RKaSrudF9Fw", "question": "What will this truck be hauling?", "choices": ["cars", "coal", "animals", "people"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357986.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79717, "question_id": "XXWhTXoEiXcaLym3mSpGcv", "question": "What type of material is the train riding on?", "choices": ["railroad track", "glass", "sand", "water"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000079717.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196088, "question_id": "XXxH9TXRiLBewzqJjFe9xH", "question": "How many people are flying in visible planes here?", "choices": ["two", "four", "eight", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196088.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384144, "question_id": "XYuKXmwWuB8zTNvBaGsJJg", "question": "Under which adverse weather event does the pole on the fire hydrant become useful to locate it?", "choices": ["snow", "hurricane", "thunderstorm", "flooding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 287787, "question_id": "XYzEHmtDeWqog7P8enHihm", "question": "What is required for this activity?", "choices": ["wind", "rain", "snow", "ice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000287787.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98625, "question_id": "Xa3Q5Yq45JPQwzSAwFZHQz", "question": "What is she ready to do?", "choices": ["swing", "sprint", "dunk", "dribble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435594, "question_id": "Xam8pD25WnhFrthHPxj8We", "question": "Where are the animals above grazing?", "choices": ["pegging", "in house", "paddocking", "open field"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435594.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552386, "question_id": "Xbto72qdo2mWYcRJBwQRoJ", "question": "What sport does the flag represent?", "choices": ["tennis", "cricket", "golf", "soccer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521174, "question_id": "XcqJoLYZybr6XtyaNQYVot", "question": "What does this animal use for defense?", "choices": ["poison", "claws", "quills", "stinger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521174.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32306, "question_id": "XczBWonqqmJiG9AwTjtKzK", "question": "Why is the umbrella inside out?", "choices": ["optical illusion", "water", "design flaw", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032306.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569297, "question_id": "Xd7ddq6v7Ru8KxMXGU9css", "question": "What is the name for the human like machine on the boys umbrella?", "choices": ["robot", "terminator", "mummy", "clone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491404, "question_id": "Xf9BnDMurvBBbmxn7PLknn", "question": "What language does the name for the red vegetable derive from?", "choices": ["nahuatl", "guarani", "cherokee", "ojibwe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000491404.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578460, "question_id": "Xg2Tdp9EpeuWQX5CDgEBL6", "question": "What made the umbrella wet?", "choices": ["brook", "rain", "snow", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510415, "question_id": "XgvgtF7wWhyEyrquSgjQeP", "question": "What type of vegetable is inside the rings?", "choices": ["pumpkin", "squash", "lettuce", "onion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000510415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515528, "question_id": "XiX3NFJVNh5DqxDwiwTKKU", "question": "What breed of dog is on the television?", "choices": ["great dane", "bassett hound", "doberman pinscher", "german shepherd"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515528.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341582, "question_id": "XjJ8t6KiUEoptfc7SS4rpC", "question": "What is the roller coaster riding on?", "choices": ["railroad track", "water", "sand", "street"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432251, "question_id": "XmrvZdXMKPrqetS5Yq7Cmk", "question": "Which object besides the cat would be used for a television?", "choices": ["food container", "remote", "food box", "food packet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432251.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 125074, "question_id": "XmxSfoBNPGKnznEvyxVoGf", "question": "This company is famous for making what commodity?", "choices": ["shoes", "toothpaste", "computers", "watches"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000125074.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 247558, "question_id": "XorZ5nh6dZ53DF4eb6Lc6F", "question": "How was the metal used to make the numbers here first changed to form into them?", "choices": ["ordered", "melted", "written", "carved"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000247558.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169338, "question_id": "Xp6PggK6PEtSdebEAqsD9z", "question": "Where is the computer probably located where they want to know someone's card number?", "choices": ["work office", "college", "computer store", "library"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169338.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515154, "question_id": "Xp9ApPAphWuVFxoHRBp7D3", "question": "What type of material is stored in this object?", "choices": ["dirt", "water", "ice", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169338, "question_id": "XpRNeMNMNdxywH52WnJtRg", "question": "What is the person attempting?", "choices": ["hidden level", "high dive", "pole vault", "log in"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000169338.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317810, "question_id": "XpUgEn333mnGVuvaTT4wtp", "question": "What type of sign is this?", "choices": ["brand", "traffic", "price", "directional"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317810.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301533, "question_id": "XpdkAHXjpDbNRbYNxVyssG", "question": "What vegetable makes up the reddish hot dog condiment seen here?", "choices": ["mustard", "beets", "potatoes", "tomatoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255511, "question_id": "Xpk2Z6zNawdrGcMDzyNWyE", "question": "What type of surfboard is the man carrying?", "choices": ["longboard", "hybrid", "fish", "bodyboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000255511.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 82993, "question_id": "XrvfpaFNPQ55252wPSciJY", "question": "What kind of transportation is shown?", "choices": ["air", "water", "road", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000082993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543394, "question_id": "Xtnoz7SmEHWLexZj3vRdkT", "question": "What does the text surrounding the clock depict?", "choices": ["country", "advertisement", "clock name", "city"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265548, "question_id": "Xu3GbX86xkVwtNLyKofpm8", "question": "What type of material is called?", "choices": ["glass", "marble", "plastic", "stone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265548.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572651, "question_id": "XuVApK2Y8vLdhuKGQyDDy4", "question": "How many different names does this street have?", "choices": ["two", "ten", "three", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572651.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263946, "question_id": "XxmQfLngebBZhxtti4CCKF", "question": "How do the people here propel themselves forward in water?", "choices": ["paddles", "sea mammals", "sail", "motor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104882, "question_id": "XxyyN3guAqQRcEeufXymGX", "question": "What type of location is this?", "choices": ["barn", "hospital", "station", "church"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104882.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 238638, "question_id": "XyaK8LQF2nR4MkStqVDco3", "question": "What most likely just happened to the ball in this image?", "choices": ["was missed", "was hit", "is floating", "was pitched"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000238638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171183, "question_id": "XyvmReRPYRnBgNkZmzriPG", "question": "What is in the cup all the way to the left of the counter?", "choices": ["juice", "toothbrush", "water", "candy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171183.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 72247, "question_id": "Xyw4cje7N8VizXzYCwyccP", "question": "What can be said about the man's head?", "choices": ["submerged", "tan", "tattooed", "bald"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000072247.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86838, "question_id": "XzVrt2iLfoLuZSjNKodjQQ", "question": "What is next to the pumpkin pie?", "choices": ["fork", "egg", "spoon", "cream cheese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086838.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209696, "question_id": "XzgTe6BK677jmatBXsBbtq", "question": "In which facility does this person rest?", "choices": ["hospital", "office", "home", "prison"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209696.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 93252, "question_id": "Y2BSCndaMWyNLKrjDBjEvS", "question": "What is the object under the sink for?", "choices": ["fill water", "opens door", "carry lunch", "discard trash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000093252.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10835, "question_id": "Y2DRkxZisZikkNM3S7m2UV", "question": "What activity are these birds performing?", "choices": ["feeding", "perching", "flying", "singing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351879, "question_id": "Y48D7AxbbeauwbmHJKSzDs", "question": "What is on the bench?", "choices": ["human", "monkey", "dog", "boxes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351879.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400325, "question_id": "Y6ZVp22qaqTJCcjBu5xbTd", "question": "What facial hair does the man have?", "choices": ["beard", "sideburns", "mustache", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000400325.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20594, "question_id": "Y7QVZ8BbSLkasMLBh2YiHP", "question": "How many ears does the animal with the object in its mouth have?", "choices": ["three", "two", "one", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020594.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498893, "question_id": "Y7cuToKUwvBfBD3RAidrpc", "question": "What name can be spelled by the letters in white on the front of the train if they are rearranged?", "choices": ["mike", "bob", "jim", "danny"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498893.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281460, "question_id": "YA82Y9XFEQsaySzQPE6xUE", "question": "If the tree is bare what did these animals eat off of it?", "choices": ["bark", "leaves", "branches", "dirt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000281460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103523, "question_id": "YAN2MEwzXMnFHMuPo6NJDh", "question": "Which car will have the hardest time leaving this area?", "choices": ["red", "blue", "tan", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109985, "question_id": "YB2aSaCptgMpiokGsipnBz", "question": "What kind of property is this?", "choices": ["residential", "wild", "industrial", "commercial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109985.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 214052, "question_id": "YB8Mo3HrbNCLKiCsgZ7eov", "question": "In which country is the pink double-decker bus operating?", "choices": ["canada", "united states", "hong kong", "united kingdom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000214052.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 474993, "question_id": "YBFgrZoaQNhPpAyhWftb6G", "question": "What might provide eating shown here that will help you keep cleanest?", "choices": ["roots", "rock", "ground", "bench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000474993.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105887, "question_id": "YBPjSqL3iUuLKSzTV7QZip", "question": "What is the zebra on the right doing?", "choices": ["interrupting others", "threatening others", "resting", "grazing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000105887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230757, "question_id": "YBjCHAudf5wUCpgRrUSGir", "question": "The person's jacket looks similar to the outfit of what Peanuts character?", "choices": ["lucy", "peppermint pattie", "charlie brown", "schroeder"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230757.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486461, "question_id": "YCdTd3bEQ6oXX2idVbWcuf", "question": "What is the average length of the tusk of this animal?", "choices": ["2 feet", "6 feet", "10 feet", "4 feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486461.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71140, "question_id": "YDTWjGi3XUFHYYNfhMpBmc", "question": "What two types of tricks is the skateboarder doing at the same time?", "choices": ["grab stall", "grab spin", "spin air", "flip spin"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000071140.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102578, "question_id": "YDZ5CAgDREqFcJpKZuLbD5", "question": "What might the animal do soonest?", "choices": ["walk", "drink", "eat", "sleep"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000102578.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548321, "question_id": "YDuSgZkYHceSyXPFN3NJnC", "question": "Why is there a fence?", "choices": ["safety", "aesthetics", "prevent theft", "natural"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000548321.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188794, "question_id": "YEXevje2NXR5SBzM5oHjFn", "question": "What type of street is this?", "choices": ["roundabout", "toll road", "highway", "one way"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325714, "question_id": "YFnWxHfdRoPGRRGuYjRyLt", "question": "She is currently standing in what type of store?", "choices": ["clothing", "electronics", "grocery", "shoe"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000325714.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285735, "question_id": "YFnjLqB2o9Gcu7KDzSkfxh", "question": "What type of animal is this?", "choices": ["wild", "reptile", "aquatic", "domestic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200069, "question_id": "YFpHriqsYJrFprfrAXKEM4", "question": "What type of duck is this?", "choices": ["mandarin", "marbled", "mallard", "canvasback"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200069.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435947, "question_id": "YGPK3gZuNvb34TB5wGjTsE", "question": "What does a crescent moon on a tiny hut usually mean?", "choices": ["breaker", "hotel cabin", "bathroom", "shed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377452, "question_id": "YGa6WgSSQSaWuiHKpq7V5J", "question": "Where does the green cord likely lead to?", "choices": ["fence", "pole", "trailer", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146138, "question_id": "YGjprSDk3DuLsRcEzvaZgt", "question": "What type of building is shown?", "choices": ["shed", "skyscraper", "barn", "house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406780, "question_id": "YJqoEtureGPzgEzV372fMJ", "question": "The metal loop on the collar suggests the owner does what with the cat?", "choices": ["pets", "runs", "cages", "walks"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000406780.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76379, "question_id": "YKQjFYYME2WEtzxi5osZes", "question": "What is the scientific name for these types of flowers?", "choices": ["abronia", "astragalus", "helianthus", "lacaena"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076379.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92507, "question_id": "YLYzuFtbiaaVzbk3iU4h9H", "question": "Which is best cheese for pizza?", "choices": ["cheddar", "goat cheese", "mozzarella", "provolone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092507.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 220144, "question_id": "YMH7pvWC8PerXRwKoNJeHT", "question": "What is required for this activity?", "choices": ["ski", "skates", "board", "shoes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000220144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16790, "question_id": "YMnEPLXKy8wqnroi59gMBq", "question": "A group of these animals has the same name as a group of what other animals?", "choices": ["sheep", "fish", "crows", "seagulls"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000016790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187021, "question_id": "YNJreeQnz6cGu2pbwL7yqz", "question": "What is covered in white stuff?", "choices": ["pizza", "donut", "bench", "cake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000187021.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 154570, "question_id": "YNYAYANkngXcjEQpkEzR5a", "question": "How can he power this board?", "choices": ["wind", "gas", "battery", "sun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000154570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 398460, "question_id": "YNp99qczygPEfipDDfXTeJ", "question": "What part of his body will touch the ground next?", "choices": ["head", "feet", "buttocks", "stomach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000398460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9704, "question_id": "YPDVdKTp7AfCSthr3snMF2", "question": "What type of passengers is the bus carrying?", "choices": ["university students", "tourists", "medical patients", "workers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009704.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339756, "question_id": "YPtv96G7NqPFoWZK2HsLQd", "question": "What type of costume are these ties for?", "choices": ["spiderman", "clown", "ghost", "batman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 245783, "question_id": "YRRFF58WruDEcmBJcKcoTZ", "question": "What is required for this activity?", "choices": ["sun", "snow", "rain", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000245783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35332, "question_id": "YTvyr5tgHhV5SgrpKsnMFq", "question": "What is used to cover the area around railroad tracks?", "choices": ["ballast", "more steel", "pebbles", "cement"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035332.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533871, "question_id": "YTyhJiV6EobX22qzFB2WyR", "question": "What might the white bird be looking for in the water?", "choices": ["fish", "leaves", "rocks", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431065, "question_id": "YUgVV2sXcoCjntExWjWfmw", "question": "What topping is often found on this item?", "choices": ["ketchup", "pepperoni", "gummy bears", "almonds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 305065, "question_id": "YWDtARUnwaECHAJ8iYP4hY", "question": "What is sneaking in on the left?", "choices": ["head", "toe", "tail", "trunk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000305065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578760, "question_id": "YWHiDVXaPEf9zrDB325ZkP", "question": "What is the only European airline that carried more passengers than this airline in 2020?", "choices": ["ryanair", "british airways", "air france", "alitalia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173794, "question_id": "YWQRqF8fmEKNog7nbyF2aN", "question": "What might the person use next?", "choices": ["glue", "perfume", "mustard", "salt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000173794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29549, "question_id": "YWhZuTLiN4MwoGWaSUwQpU", "question": "What surface is behind the giraffes?", "choices": ["sand", "rocks", "marble", "concrete"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 370753, "question_id": "YWsbSmCCDZk5bUamoK7caH", "question": "What type of plants are near the giraffe?", "choices": ["cactus", "ivy", "flowers", "shrubs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000370753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303249, "question_id": "YXuThEZsCP3uLzJizST5d5", "question": "What is a problem in this area?", "choices": ["crime", "weather", "bad wi-fi", "children"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372195, "question_id": "YassYSAAL699wEzyASV2do", "question": "How does the person eating the soap like their bread?", "choices": ["soft", "wet", "basted", "toasted"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372195.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53278, "question_id": "YbEmoF7q6igYXmKuVuTMDP", "question": "What country is this truck based out of?", "choices": ["usa", "germany", "russia", "hungary"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053278.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447925, "question_id": "YcCsn8uTQhZZzg8suJAE69", "question": "What type of video game is the man playing?", "choices": ["puzzle", "racing simulator", "rpg", "shooter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 326835, "question_id": "YdHtCx6YsuhwncfnMYQDe5", "question": "What does the animal foot have?", "choices": ["gills", "talons", "hooves", "webs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000326835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444375, "question_id": "YgNg68xyHnyqdsEYkXSPGV", "question": "Upon which empire does this memorial stand?", "choices": ["british", "united states", "russian", "german"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 537809, "question_id": "YhDnocw8rkKbCQJF2DycmT", "question": "What type of transportation is shown?", "choices": ["road", "air", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000537809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500881, "question_id": "YmiufRhyaaoDRE8sSgt4H6", "question": "Why is the player wearing gloves?", "choices": ["warmth", "fashion", "grip", "health"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562625, "question_id": "Yqz5HcrEKuu24CTYBj3J9b", "question": "What item seems to be plugged in on the counter alongside the kitchen appliances?", "choices": ["scale", "radio", "timer", "mixer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562625.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207101, "question_id": "YrTUdRoxdbYVSLFYHegMD2", "question": "What activity are persons riding on this bus normally partaking in?", "choices": ["commuting", "sight seeing", "shopping", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120723, "question_id": "YrUKvesXXKeFRkSxbE9aVF", "question": "What is being recorded?", "choices": ["images", "words", "history", "voice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 285423, "question_id": "YrdNHzD4ut9SN6GY22zfmW", "question": "The bent street sign reveals that this street is most likely what?", "choices": ["straight", "blocked", "curved", "short"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000285423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24377, "question_id": "YsnJseZdac6KQg5wNif7A4", "question": "How many different types of collars does the dog have on?", "choices": ["two", "three", "five", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456795, "question_id": "YsoLfQDWZpvMAJGLQp2tED", "question": "What sort of odor would one smell if you were sitting here?", "choices": ["vanilla", "cinnamon", "roses", "tomato"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456795.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558691, "question_id": "YvNdkiEvx68iqag7CzjFpZ", "question": "Why are there no balls on the table?", "choices": ["roll away", "not needed", "stolen", "hidden"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000558691.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79137, "question_id": "Ywh3dZ3Z99eENAXpzBaV4K", "question": "What beverage is likely readily available for consumption in the kitchen?", "choices": ["beer", "wine", "tea", "coffee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079137.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507250, "question_id": "YyFHC3tnQpdv9QKvfSgmJc", "question": "What is in the container?", "choices": ["rocks", "cat hair", "milk", "sweets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507250.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 15308, "question_id": "Yyr5ywFkYuvFSecwiVyLCv", "question": "What is flying in the sky?", "choices": ["kite", "bird", "balloon", "airplane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000015308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296835, "question_id": "Z2ifKQkJeXzQ7FSuWHMzL3", "question": "What type of item is next to the book?", "choices": ["toothbrush", "dvd", "stereo", "toothpick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119193, "question_id": "Z32frBaQBac99QH4nqKJGv", "question": "The back of this persons coat displays what element?", "choices": ["stone", "water", "wind", "fire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119193.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27908, "question_id": "Z3NYYK2u2j4r6whC7xF9pL", "question": "What type of range is shown?", "choices": ["math", "mountain", "gun", "shooting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027908.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183592, "question_id": "Z6umfMgbFRLjNXFAUV5HD3", "question": "What kind of edge does the scissors have?", "choices": ["narrow", "serrated", "grooved", "dull"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336826, "question_id": "Z6wr6J3yq3X79coWkhemYe", "question": "On what locations do you mostly find this kind of sport?", "choices": ["plane land", "ocean", "mountain tops", "desert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336826.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301181, "question_id": "Z9ZitwCw8xdw69zqfR2nxz", "question": "What device is most likely used to play the music discs on the shelf?", "choices": ["walk man", "cd player", "vcr", "mp3 player"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207417, "question_id": "Z9eFRyZvcA6Njx8a5aoCsz", "question": "What is keeping the animal in this dirt covered area?", "choices": ["water", "fence", "predators", "wall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207417.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436655, "question_id": "ZA6bLpFgbsxPLkztZ6LNCN", "question": "What might the bus be stopped on the curb to pick up?", "choices": ["food", "packages", "passengers", "dogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000436655.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 217277, "question_id": "ZALKGGbh9e8jLeQrMTmspH", "question": "Where does the door by the shelves lead to?", "choices": ["kitchen", "bedroom", "garage", "outside"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000217277.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 199068, "question_id": "ZG4u8RKqiiYjR5Jz5agstG", "question": "What is the donut sitting on?", "choices": ["bench", "table", "floor", "counter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000199068.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432845, "question_id": "ZHyXsmrCknxUoZTH3Evx2c", "question": "What object is the surfboard shaped like?", "choices": ["snowboard", "ski", "skateboard", "canoe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432845.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486569, "question_id": "ZJqeHSwfv229sDiZvyhKYS", "question": "What is a famous park associated with this animal?", "choices": ["yellowstone", "central", "hyde", "clumber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486569.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221263, "question_id": "ZKR5PY8RxDXG5C2XMH9VCz", "question": "What are they doing?", "choices": ["resting", "fighting", "feeding", "mating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 531794, "question_id": "ZMbYWhigKtyayTGQ2UfNFA", "question": "What is this guy doing?", "choices": ["swimming", "sleeping", "diving", "jumping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000531794.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209854, "question_id": "ZNLKmZYGTEDAnEESiANdzx", "question": "How many unused items shown here?", "choices": ["six", "three", "none", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209854.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384969, "question_id": "ZQs5MAeGV5kf4dnCDWfMZZ", "question": "The item next to the cat on the laptop is for someone with poor what?", "choices": ["credit", "hearing", "eyesight", "mobility"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384969.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239265, "question_id": "ZRMRtNZTLBCQHMsovE3C6M", "question": "What type of metal is the sink faucet made out of?", "choices": ["titanium", "gold", "silver", "copper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 14358, "question_id": "ZRU7Rv2K8J9aJS6wCzzaB2", "question": "What is permitted to the right of the dotted white line on the road?", "choices": ["crossing", "u turns", "racing", "bus stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000014358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 359668, "question_id": "ZSxrRrjscfBNPD9ArvwXfC", "question": "What type of pie is seen?", "choices": ["blueberry", "apple", "mince", "pumpkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000359668.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 211606, "question_id": "ZVxwbHVuuyjk5Y8coMCQfd", "question": "These animals would like to get access to what nearby thing here?", "choices": ["cats", "trees", "rocks", "dogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000211606.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 561906, "question_id": "Zar6YBTwrFsyuJw8Gk8pB3", "question": "What is the white item called?", "choices": ["egg", "milk", "shell", "frosting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000561906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264750, "question_id": "Zb5Brzoxpmd7CgDzPceMMD", "question": "Why is the bear on his hind legs?", "choices": ["see person", "threatening person", "eating", "balance"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000264750.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251099, "question_id": "ZbzyEc9P6Xw5yPVDMJ8v87", "question": "The man has what above his nose?", "choices": ["lemur", "scar", "glasses", "banana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251099.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 119697, "question_id": "ZffjCdL4YwyRezn5MGxfHC", "question": "What's the black object you sit on when riding the white horse called?", "choices": ["sitback", "tack", "saddle", "bench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000119697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90212, "question_id": "Zh88PNkHcYFW4TQDUSwHPV", "question": "What does he hope will be in the net by the end of the day?", "choices": ["ball", "money", "boot", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090212.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90607, "question_id": "Zi7BZhL5ViEzHtNNCgWi5c", "question": "What is missing from the bed setup that is normally used with mattresses?", "choices": ["pillow", "frame", "box spring", "sheets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216635, "question_id": "Zj5rFKLDFrH8ubFbAKskYw", "question": "What does the woman look to be?", "choices": ["asleep", "reading", "mad", "drunk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216635.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298925, "question_id": "ZkPQz4yB6MhbHmNFF8esHE", "question": "What is the most romantic item shown here?", "choices": ["bowls", "pink flower", "cards", "white flower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475452, "question_id": "ZmZPQubief7Cv6mnsq4Y3S", "question": "Who is the manufacturer of this bicycle?", "choices": ["schwinn", "giant", "peugeot", "trek"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000475452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412840, "question_id": "Zo62CMcvbSH3xuvajhijKf", "question": "What item would clean this floor the best?", "choices": ["pressure washer", "grease", "vacuum", "mop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412840.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71827, "question_id": "Zo9r94vmAeTLASCavjBdm4", "question": "Which part of the air craft called running shoes?", "choices": ["fin", "wing", "wheel", "engine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071827.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330992, "question_id": "ZoV5ABzcrgXq6yGtxmno7c", "question": "What kind of position is the surfer engaged in?", "choices": ["kneeling", "standing", "squatting", "sitting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330992.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 248606, "question_id": "ZocCb3afjQkBxJQXUbqzot", "question": "How many wheels are there likely to be that can't be seen?", "choices": ["two", "four", "three", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000248606.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 354301, "question_id": "ZpqtVWoEieKwiiyo3n5u6n", "question": "Besides sugar what other powder was probably used in the icing?", "choices": ["cinnamon", "vanilla", "carob", "cocoa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000354301.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 28724, "question_id": "ZqDmQTcvWeVncmzVhWVoYz", "question": "What sport are the men playing?", "choices": ["tennis", "disc golf", "cricket", "water polo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000028724.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109618, "question_id": "ZqcQFWjJ9pznrSXVFWa5Zv", "question": "The cat is near what?", "choices": ["shoe", "computer", "magazine", "window"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109618.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296723, "question_id": "Zr7bXrnijJmtSyDmZikdC8", "question": "What are the animals doing?", "choices": ["dragging", "cuddling", "playing", "fighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 395783, "question_id": "a3fWE2hcm36thXADUNmoxM", "question": "A male of what type of animal shares the same name as the first four letters of the sign?", "choices": ["pig", "cat", "dog", "deer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000395783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189783, "question_id": "a4MsKnDRBf9VRRMmTJdHW8", "question": "What would be the most logical explanation for the toilet being where it is?", "choices": ["displacement", "storage", "art", "decoy"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000189783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21781, "question_id": "a5DofKWbadoL74mgknmYua", "question": "What thing does this device need to allow people to park their vehicles there?", "choices": ["cats", "ticket", "money", "oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291398, "question_id": "a7BMSYbbEcBAMuoTUYBE3F", "question": "How many different colors of paper strips are there?", "choices": ["four", "seven", "five", "six"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000291398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 502273, "question_id": "a99sbcMoGDqgwnHp67xNa2", "question": "What material are these vases made of?", "choices": ["glass", "clay", "stone", "marble"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000502273.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 73785, "question_id": "a9m525DbWxYfLCCpVzj29R", "question": "What do these meters measure?", "choices": ["syllables", "weight", "tempterature", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000073785.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308408, "question_id": "aBkwrtsgp3bEHtW94CqvsG", "question": "What is the name for the version of this that is meant for sunny days?", "choices": ["bucket", "lamp", "parasol", "rake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308408.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412766, "question_id": "aBqr4gTNuwuwWKZWQdwoeP", "question": "Why is everything tinted green?", "choices": ["paint", "green sunlight", "green flames", "camera filter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412766.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 72665, "question_id": "aCciDPDsDVgpPkFL8eHnyn", "question": "What type power source does this vehicle rely upon?", "choices": ["gas", "coal", "none", "electric"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000072665.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453364, "question_id": "aChcjx4do8HEfD5Tzwk2EZ", "question": "If this is the entire meal the person will feel what soon?", "choices": ["hunger", "jealousy", "sore", "thirst"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 301103, "question_id": "aF5Argxw6gPe2FpGMk2o7K", "question": "What is behind the plane in the background?", "choices": ["flags", "tennis balls", "soldiers", "arrows"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000301103.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29549, "question_id": "aGfKb85t4XCgm4JjcAQiXk", "question": "If you saw these animals in the wild what continent would you be on?", "choices": ["europe", "south america", "north america", "africa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 503231, "question_id": "aHZUsLhEGkTr7qWyVWibV5", "question": "What type of vehicle is the person riding?", "choices": ["bike", "moped", "scooter", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000503231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446676, "question_id": "aJYMFR6QW8SDy3aCWCiR4G", "question": "How much does it cost to make a call to this number?", "choices": ["$4", "$1", "0$", "$.50"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446676.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385278, "question_id": "aMoDcWEoSNhVATfgzGbCfa", "question": "What material was used to create this heart on the sign?", "choices": ["chalk", "spraypaint", "watercolor paint", "oil pastel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385278.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298532, "question_id": "aNxHjhWQJWHrXAed7vwPT7", "question": "What is this accessory used for?", "choices": ["calling", "typing", "listening", "watching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298532.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420853, "question_id": "aPkXySAgjZVksSWYynobDM", "question": "Which part of the plants will these animals eat?", "choices": ["branches", "trunk", "roots", "leaves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179582, "question_id": "aPyQH9YPAsDWU7FoXtM2RM", "question": "Why is he wearing a suit?", "choices": ["uniform", "dress code", "costume", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577010, "question_id": "aQ3y8XzU5K6HtugSSeTEna", "question": "The texture of the the tennis court is?", "choices": ["sand", "asphalt", "cement", "soil"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000577010.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175002, "question_id": "aQ9aA94aHvxGz7qkdnGK7S", "question": "What is the giraffe smelling?", "choices": ["flowers", "food", "another animal", "plants"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175002.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 559942, "question_id": "aRN4n6QMKpLc8m9AsiPwNG", "question": "Who manufactured the bus?", "choices": ["acura", "volvo", "chevrolet", "mercedes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000559942.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52119, "question_id": "aTXKTvC3jhXyzLAeSEYGNB", "question": "What does the person have on?", "choices": ["gloves", "clown nose", "castanets", "basket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000052119.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11853, "question_id": "aVSEjKjeXns9Ms5ueCdSg7", "question": "What kind of parking is available?", "choices": ["valet", "lot", "diagonal", "street"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011853.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517904, "question_id": "aWHJg6GLxgJqZx5tDQKw8x", "question": "Why does the CGI woman on the surf board have no visible feet?", "choices": ["clipping error", "model issue", "design choice", "rendering issue"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000517904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161892, "question_id": "aWVP3B4wX47GnDnXohRYqh", "question": "What will the person who lives here do before closing the refrigerator?", "choices": ["toss rat", "nothing", "add kitten", "remove cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161892.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347028, "question_id": "aXu9GPiYPgB9Pp9FPA85TF", "question": "What is the animal doing?", "choices": ["crying", "eating", "looking", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347028.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521462, "question_id": "aYmhUAGQEu3AG84iCqQmhE", "question": "What activity is the person with the helmet doing?", "choices": ["piloting", "motorcycling", "biking", "boating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521462.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 404786, "question_id": "aZTCxgWwLPtCjKKX7TgQ8B", "question": "Where is this hat usually found?", "choices": ["circus", "concert", "rice fields", "baseball game"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000404786.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576090, "question_id": "aaogydcEMd5N5aq23rz2d7", "question": "What could someone do when on top of the white tower?", "choices": ["grind", "blast off", "water ski", "observe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576090.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189601, "question_id": "aarTfTapBVLKNmAioCotaR", "question": "What type of area is shown?", "choices": ["desert", "seaside", "urban", "rural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437147, "question_id": "acUq4Z2FtuSDsH83Y3y8H6", "question": "What is another game that is played with a racket and ball?", "choices": ["badminton", "golf", "baseball", "squash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000437147.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449680, "question_id": "afNYpZZXYC6zwgUS65ArTP", "question": "What is lacking in this bathroom area?", "choices": ["technology", "people", "privacy", "toilets"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449680.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269966, "question_id": "afdo6aJ8xnDdgrDobBiJgo", "question": "The bird here prefers which environment?", "choices": ["mountains", "trees", "water", "arid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269966.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578834, "question_id": "ahtoYrTRkA3XyyzBdKY9ki", "question": "Why is the player in this stance?", "choices": ["passion", "anger", "injury", "anticipation"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000578834.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 496163, "question_id": "am6FTvsFKsXrTBLwDJHwU7", "question": "Who is this zebra with?", "choices": ["gazelles", "giraffes", "elephants", "no one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000496163.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269801, "question_id": "amUBwosMS2Y9GX6wFJpFpN", "question": "What would a human use in the place of this item?", "choices": ["brush", "spade", "fork", "knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269801.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191971, "question_id": "an8ZBvN57hDGkcF5K9jfZE", "question": "What meal time is closest to the time illustrated here?", "choices": ["breakfast", "dinner", "lunch", "midnight snack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191971.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142713, "question_id": "anNY2wmfiZcNbwasW7ewdj", "question": "Which South American country's flag is shown?", "choices": ["brazil", "guyana", "chile", "argentina"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532365, "question_id": "anurWUAPYEFomEP9hreLok", "question": "Which kind of toilet is designed sit like squat?", "choices": ["eastern", "western", "northern", "southern"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000532365.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410128, "question_id": "ao4ESbwwpb8jcq76VaXusM", "question": "What are the fluffy things made of?", "choices": ["cotton", "smoke", "water", "gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68930, "question_id": "aoFiXyMBVtLoZDSR36UKmg", "question": "In order to be food safe this kitchen would need a good what?", "choices": ["wallpapering", "expanding", "remodeling", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068930.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376928, "question_id": "appfKmd7QSFRbYEXk2SKqw", "question": "What is the material of the walls?", "choices": ["drywall", "plaster", "wood", "porcelain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376928.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90416, "question_id": "asP2WeN8KGNWMY8NEHoKZd", "question": "What is the are the yellow protruding parts on the vegetation above called?", "choices": ["leaves", "stem", "roots", "flowers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090416.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255768, "question_id": "asgiZuDvSWJVD6CLAGjMT7", "question": "What is he standing on in all likelihood?", "choices": ["bodyboard", "jet-skis", "surfboard", "floatie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000255768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 87841, "question_id": "atHbsuuLhyXGygMAkWMWhA", "question": "What is this particular building called?", "choices": ["nuisance", "outhouse", "convenience", "sanitary"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000087841.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458916, "question_id": "atQSC6gLoYStMSCP9hywZP", "question": "What is the same color of these items?", "choices": ["salad", "lemon", "blueberry", "pumpkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458916.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34150, "question_id": "auPoncHSqWdbZpkKookGkF", "question": "What action is the woman with the hat on doing?", "choices": ["hitting", "sleeping", "sitting", "eating"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000034150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292218, "question_id": "auw95iCcxxcXbK4kaSBbty", "question": "What surface is the boy playing tennis on?", "choices": ["outdoor hard", "grass", "clay", "indoor hard"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000292218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519817, "question_id": "avNTxnYsiQ3xjjMSAopapy", "question": "What is he doing?", "choices": ["stealing board", "cleaning board", "repairing board", "preparing board"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000519817.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8523, "question_id": "awVYQRzNAf8N2dRKK5aQyN", "question": "What letter is most likely missing from the word?", "choices": ["t", "r", "s", "n"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000008523.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532364, "question_id": "ayT4vAZvw7Kr65zBbwVPfU", "question": "What winter activity is the person performing?", "choices": ["snowboarding", "ice skating", "skiing", "ice hockey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103950, "question_id": "ayU7PwUuhyVEhG4yb8HvKG", "question": "What might these animals use their long beaks for?", "choices": ["fighting", "sipping soda", "fishing", "whistling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103950.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34728, "question_id": "b3Rr9v2Gmof5rfMaadrTkJ", "question": "What brand is the pink top?", "choices": ["adidas", "addis", "nike", "puma"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034728.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 68727, "question_id": "b3Vkedn5Gr62NJaiGBEx7A", "question": "What kind of names are most probably printed onto their boards?", "choices": ["schools", "superheroes", "family", "brands"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000068727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156837, "question_id": "b4qC7V5kampj3BKU2BxmLh", "question": "What is the name of the equipment above?", "choices": ["safety pin", "tap", "log", "fire hydrant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368664, "question_id": "b5Q522enAjPi3DTEvAbifE", "question": "What's the most likely reason the man is wearing glasses?", "choices": ["poor vision", "talent show", "costume party", "showing off"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368101, "question_id": "b5vCj4hHyW2fzJWN6koSPN", "question": "How does it likely feel outside?", "choices": ["hot", "cool", "warm", "freezing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000368101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371303, "question_id": "b7rxj9aespSEaoPJwMAHgP", "question": "Which bench is the color of a watermelon?", "choices": ["neither", "left", "right", "both"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525800, "question_id": "b8jzomjauihgwoMPtKEN2H", "question": "What brand is on his board?", "choices": ["volcom", "roxy", "quicksilver", "billabong"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000525800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345888, "question_id": "b8tXGPJzKTCXvGoUyfGDSf", "question": "What is needed to complete the system shown here?", "choices": ["television", "camera", "microwave", "monitor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345888.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90916, "question_id": "b8udaKgQnrpccE3Wfa6UW5", "question": "What is this vehicle carrying?", "choices": ["cattle", "cars", "coal", "passengers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090916.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 412179, "question_id": "b9NtAxH8dD3JKbvQuAibzG", "question": "What is needed for this activity?", "choices": ["sun", "snow", "waves", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000412179.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 347010, "question_id": "b9rnBEL7qmHfKgczkf3GW5", "question": "What is this type of bed called?", "choices": ["platform", "daybed", "canopy", "trundle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000347010.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512235, "question_id": "bAgksH77DCxHycv2JcWCZ7", "question": "What might one find inside this animal?", "choices": ["stuffing", "glass", "weed", "money"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512235.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 210154, "question_id": "bAktVZJ8XsanEHnp7zKSBS", "question": "What is the capital city of the photographer's country?", "choices": ["rome", "warsaw", "paris", "riga"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000210154.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83314, "question_id": "bB7oSMZKfSvBqVHSgTd43i", "question": "What is on the cat's tongue?", "choices": ["fur", "water", "whiskers", "cup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456026, "question_id": "bCLFyYXcWXW4ueSbroExnu", "question": "From which city is his cycling club from?", "choices": ["ajax", "toronto", "brampton", "hamilton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 357309, "question_id": "bDbGLBkjNvN7x9PqeW3sQ6", "question": "From where is the person taking this picture?", "choices": ["bar", "inside uber", "airport", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000357309.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 292784, "question_id": "bDkBUWb4v5XNiMd5CwUFX7", "question": "What would you feed these animals?", "choices": ["bugs", "hay", "eggs", "fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000292784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127017, "question_id": "bGrjsRUEJzaZRJfgm3iDem", "question": "What is the snowboarder trying to practice?", "choices": ["painting", "tricks", "repairing", "shouting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127017.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293444, "question_id": "bHANAouZ2ytnxc96NVWRtS", "question": "What is the lowest traffic light conveying to people?", "choices": ["walk now", "no bicycles", "no crossing", "don't walk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000293444.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578760, "question_id": "bHYtX6xBMMMDuD7z68mUPo", "question": "Which one of these cities does this airline have a hub in?", "choices": ["munich", "caracas", "des moines", "montego bay"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000578760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407827, "question_id": "bHZzd8tydNGFkThL5LLy7Q", "question": "What feature does this animal have?", "choices": ["pouch", "horns", "stinger", "trunk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000407827.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546809, "question_id": "bHiWW9R5NZrNUqPYddPHzc", "question": "What is the dog laying on top of?", "choices": ["bed", "hammock", "porch", "couch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546809.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182590, "question_id": "bHpkaC3dFNcG4bR2c8ngt8", "question": "What is the lifespan of elephant?", "choices": ["90 years", "65 years", "100 years", "56 years"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000182590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428901, "question_id": "bJcHWiheF4cZ3YzPA2cdZW", "question": "What type of pipe is shown?", "choices": ["crack", "tobacco", "marijuana", "plumbing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000428901.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20228, "question_id": "bKkWELRdCGw5pfCq2PSDLi", "question": "What would it travel in on a plane instead of this suitcase?", "choices": ["crate", "car seat", "stroller", "seat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020228.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54239, "question_id": "bKmKtygiHidMdFcUCgdm7j", "question": "What can this treatment remove?", "choices": ["mucus", "odor", "acne", "hair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054239.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169067, "question_id": "bLEto2gV8CXCUge35qBR85", "question": "What are the small mirrors on either side of the truck called?", "choices": ["side", "rear view", "adjustable", "back"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 239727, "question_id": "bRV5Gi3JZ94FAdSYSpedxR", "question": "What food comes from the country this plane is from?", "choices": ["souvlaki", "pizza", "hamburger", "bulgogi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000239727.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375845, "question_id": "bSowETZDLHV4BvxsrvgEUN", "question": "What type of furniture is this cat laying on?", "choices": ["bookcase", "chair", "sofa", "table"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000375845.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424371, "question_id": "bTU4fDPgvomj8vDFNbAFaU", "question": "What type meeting might the cowboy hatted person be attending?", "choices": ["convention", "trial", "funeral", "witness protection"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424371.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 538648, "question_id": "bVjpEGam2PKrVWxpSzFMes", "question": "What is the airplane specialized to land on?", "choices": ["rocks", "snow", "water", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000538648.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409112, "question_id": "bWgCED6uQmycNAUerKKitY", "question": "What is contained inside the white cube seen here?", "choices": ["rollodex", "stereo", "light", "ice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409112.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178981, "question_id": "bYujoVdUrcMxGdMi5gXBFs", "question": "What makes the animal above thrive in cold conditions?", "choices": ["dormancy", "none", "thick fur", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178981.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270675, "question_id": "bbu9FfyH34xbCnyrXWjXhQ", "question": "Why is he holding the plate?", "choices": ["hiding it", "for master", "stuck", "stole it"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000270675.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277574, "question_id": "bcZ8oSp2Chc35JbkGpyKNC", "question": "What is the smallest animal visible in this image?", "choices": ["chimpanzee", "elephant", "zebra", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277574.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341664, "question_id": "bderZFkAmTL5wv2TpvBToQ", "question": "What is the man in black trying to jump onto?", "choices": ["raft", "surfboard", "kayak", "cardboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350168, "question_id": "bdiHfA2Ft6LydoDZF6Rrzz", "question": "What type of energy goes through the pole behind the sign?", "choices": ["wind", "nuclear", "thermal", "electrical"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350168.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373213, "question_id": "besUnG9XeokpPj3BNEjJ5R", "question": "In which country is this most likely?", "choices": ["france", "england", "spain", "germany"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373213.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136353, "question_id": "bfdPz2YdEXHQgCxZTA7SBo", "question": "What is the tool above made of?", "choices": ["none", "fiber", "plastic", "metal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136353.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180, "question_id": "bh4umz9uAZmSY2URM9ETzR", "question": "What is the bear looking for?", "choices": ["cub", "berries", "fish", "nuts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000000180.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81979, "question_id": "bh9Qy9gaeAuGhPM48uxXEz", "question": "What is the dog doing?", "choices": ["resting", "playing", "chasing", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081979.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 303070, "question_id": "birokNJjeUQwXr4wKxi9zr", "question": "The athletes at his school go by what nickname?", "choices": ["huskies", "bears", "moose", "ravens"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000303070.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121880, "question_id": "bjP6hwFzuTm4gkXwaBs6RH", "question": "What type conveyance is visible here?", "choices": ["boat", "bus", "none", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59645, "question_id": "bkMHFPYLjD7GAid4t7dGUk", "question": "What would the device on the left be used with?", "choices": ["shoe laces", "computer", "stove", "bike tire"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059645.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466240, "question_id": "bkf6g6dn2wyieMkvjS6Jpn", "question": "What kind of bathroom is it?", "choices": ["plane", "restaurant", "home", "school"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000466240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71922, "question_id": "bkpSB3JCc3rtREV5Ruxq9y", "question": "What type of animal is printed on the bus?", "choices": ["lion", "bear", "zebra", "tiger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071922.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506244, "question_id": "boST8RpLPhenw36yoTRP5h", "question": "How many pieces of food can a monkey hold at one time?", "choices": ["four", "two", "one", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000506244.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264192, "question_id": "bpFKe75xWPk6VfBNSD4bcz", "question": "What type of cloth is the person above wearing?", "choices": ["casual", "swimming costumes", "official", "semi-casual"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264192.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552882, "question_id": "bpaqT9zBArMLK7PFtKq3hs", "question": "What is the zebra doing?", "choices": ["sleeping", "eating", "jumping", "scratching"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552882.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422904, "question_id": "bphTsPfS4yGZuDFVdgAXKu", "question": "What water sport is the man engaged in?", "choices": ["free diving", "kayak surfing", "fly boarding", "body boarding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422904.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10636, "question_id": "bqofRU2o3X3mxGmmxcyjub", "question": "What is needed to fill the object the people are holding?", "choices": ["air", "solids", "rocks", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010636.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 220513, "question_id": "brLQvTYFBxTBLfxS5U3vi6", "question": "What color are the bathroom walls?", "choices": ["light blue", "canary", "beige", "magenta"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000220513.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65401, "question_id": "brocUcF2ndceuNVLRM8FzZ", "question": "What usually goes in this machine?", "choices": ["meat", "candy", "change", "eggs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65133, "question_id": "brwt9Txf7a3xy97bkoX5yz", "question": "What left the tracks on the dirt road?", "choices": ["helicopter", "zebras", "bicycle", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065133.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453671, "question_id": "bt2Ypvntzh3Vagag56Ysg4", "question": "What vehicle is the device on the left used for?", "choices": ["busses", "bikes", "cars", "skateboards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000453671.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161593, "question_id": "bveLKmx47mEb3XfSyiwi3w", "question": "What is stored inside the white object the cat is on?", "choices": ["books", "clothing", "tools", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161593.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95074, "question_id": "bw4rSYwKUUJhuvCbLCtNVc", "question": "If you needed to remove the water that is collected inside this toilet where is the button located on it to do this?", "choices": ["side", "bowl lid", "bottom", "tank top"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095074.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557456, "question_id": "bwjUrR7yhUG7dxRRASBZoS", "question": "Which one of these newer payment types is accepted on their website?", "choices": ["stripe", "bitcoin", "paypal", "dogecoin"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000557456.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498195, "question_id": "byJm6oft7XHdXHMRXyC44U", "question": "What is the type of material covering the tree?", "choices": ["latex", "chain mail", "mesh", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498195.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100265, "question_id": "byZFWnhUUcc4HrBcMwcYU7", "question": "What is the woman holding?", "choices": ["skates", "skiis", "snowboard", "hockey sticks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 120861, "question_id": "bzcu6qX2nQxGKEgPXe6MQe", "question": "What are the unusual words on the sign written in?", "choices": ["sharpie", "crayon", "pear juice", "blood"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000120861.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379459, "question_id": "bzsqxuANYzQdS5tC5dQLVV", "question": "What video game franchise is the symbol on the boys green shirt from?", "choices": ["mario", "minecraft", "sonic", "zelda"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309357, "question_id": "c38p5CaCztL7YX4SahtLm6", "question": "What can be used to mold the vessels above?", "choices": ["loam", "clay", "sand", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309357.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 267626, "question_id": "c3JPPwdHF4eyngj9E7XXnz", "question": "What is another name for the suitcases on the sidewalk?", "choices": ["stuff", "baggies", "luggage", "utensils"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000267626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54239, "question_id": "c3qjZzj5nySZxTiHUbeHEd", "question": "What does the sinus rinse need to be mixed with?", "choices": ["mineral water", "distilled water", "tap water", "sparkling water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054239.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107336, "question_id": "c4YAKjhctBoiAJp93Ztv5n", "question": "What do the insects on the shower curtain start out as?", "choices": ["moths", "beetles", "spiders", "caterpillars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107336.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443378, "question_id": "c5Jje8ufvR2ycuKtWJLJUw", "question": "This picture was likely snapped by someone named what?", "choices": ["sean", "dana", "marcus", "kyle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443378.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 140509, "question_id": "c7qCWCRSh6jJ68FHgyAKJk", "question": "What is the liquid on the bottom of the box?", "choices": ["spilt beer", "rainwater", "melted butter", "pan grease"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000140509.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358533, "question_id": "c7xdKhTetCVYJni8pWb2aS", "question": "Where were these items before they were picked?", "choices": ["snow", "sky", "dirt", "ocean"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 346170, "question_id": "c8V25P2LpnApYSDD2mYnVZ", "question": "What kind of bathroom is it?", "choices": ["commercial", "plane", "school", "home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000346170.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10485, "question_id": "c9MsGvWQNkD5zMLTd84oDv", "question": "What is the side dish called?", "choices": ["french fries", "hash browns", "bacon", "eggs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010485.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379963, "question_id": "cAztgCFavrdcbQfaWqZo7j", "question": "What holds the toppings on the food?", "choices": ["cheese", "honey", "toothpick", "syrup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105944, "question_id": "cBG2wmSbZ33xFqMMiZmPA2", "question": "What is the skateboarder setting up to do?", "choices": ["jump", "crash", "duck", "trick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105944.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 547329, "question_id": "cCbsL6ho6k55hPvMJaf27t", "question": "The bed shown here resembles what most?", "choices": ["log", "car", "sleigh", "coffin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000547329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230579, "question_id": "cCwkvCf25Upwic76Zw5SAo", "question": "What is the man doing with his hands?", "choices": ["peace sign", "thumbs up", "finger guns", "high five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24392, "question_id": "cDKaLSYL7AQyFqE9nauiHL", "question": "What passion does the person who controls this space have?", "choices": ["sales", "emgamd", "tuesdays", "oranges"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000024392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177333, "question_id": "cGqbTyrTDHxne4K8jdPvbn", "question": "Why is this apartment empty of all furniture?", "choices": ["sale listing", "poverty", "fire damage", "minimalism"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000177333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 65439, "question_id": "cJNs5rjWspARrTpM6gPCMo", "question": "What is the person using to hold the donut?", "choices": ["paper towel", "butcher paper", "tissue paper", "napkin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000065439.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331792, "question_id": "cKXyTCKdZWNtkWEVArY4wf", "question": "What century was the vehicle depicted invented in?", "choices": ["twentieth", "tenth", "twenty first", "nineteenth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331792.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304940, "question_id": "cLaeD3QnR8GtzUvLoUNo2A", "question": "Zebras have which color stripes?", "choices": ["brown", "grey", "white", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304940.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1118, "question_id": "cLgppdTdLusCxgad9RF388", "question": "What is usually found on the item the woman is eating?", "choices": ["hamburgers", "carrots", "ketchup", "beans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000001118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265247, "question_id": "cLwk4kJ89oENfBv8HxVUQt", "question": "How is the string strapped to the leg of the surfer called?", "choices": ["life rope", "safe string", "leg rope", "surf rope"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265247.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 523592, "question_id": "cM22TqzsGiF7f5wLdDHo5e", "question": "How many people currently share and utilize this bathroom?", "choices": ["ten", "12", "none", "11"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000523592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 407258, "question_id": "cMHjWk8Dod9nvKhmAuY8HU", "question": "What matches the colors of the curtains?", "choices": ["cheetah", "goldfish", "zebra", "robin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000407258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 34323, "question_id": "cMVqm6FBj74QvNkU6MvnUe", "question": "What countries flag is flying in the middle of the silver poles?", "choices": ["united states", "germany", "france", "united kingdom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000034323.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86211, "question_id": "cMiTirrEupA5nS7ZhSFvQY", "question": "What will the structure over the stove suck up?", "choices": ["fire", "smoke", "grease", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259363, "question_id": "cPenZJV74TrbsxxkRBeg9d", "question": "What is definitely higher in elevation than the kite?", "choices": ["people", "house", "trees", "crane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259363.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439211, "question_id": "cPunjcvf2iPm2DvzoYLd3D", "question": "Who will the red tag around the dog's neck help locate?", "choices": ["owner", "neighbor", "trainer", "vet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280366, "question_id": "cQQeHTNAsx5phMWgMVMAgL", "question": "Where are persons directing air traffic located here?", "choices": ["airline gate", "in cafe", "tower", "on plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280366.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209673, "question_id": "cRTokHQPbTDwmTBQZULGMF", "question": "How many species likely share this bed?", "choices": ["one", "none", "three", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209673.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276726, "question_id": "cRWXEhsBJjPqG2DEc7MYpw", "question": "What type of phone is he using?", "choices": ["landline", "cellular", "rotary", "corded"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000276726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338084, "question_id": "cRdXA6b9gEr3S8BdZqezhL", "question": "Why is his face covered?", "choices": ["hiding", "keep warm", "disguised", "stay dry"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000338084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45196, "question_id": "cRxuJMeK9Z2xrxSrNiYLa6", "question": "What has a handle here?", "choices": ["television", "pan", "baton", "sword"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000045196.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196425, "question_id": "cSKnxdERwFfbWXhqYToi4C", "question": "What part of the country is this dog in?", "choices": ["mountaintop", "coastline", "city", "farmland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196425.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563198, "question_id": "cShEWDjQDyeKmPFirWCUne", "question": "What were the initials of the founder of the shirt company?", "choices": ["sn", "ml", "ad", "rp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563198.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86977, "question_id": "cTGVqrdd7HLtdQSNCPwWvy", "question": "What might the bike rider hold in their hand while this picture is being taken?", "choices": ["rock", "grass", "camera", "clothes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351687, "question_id": "cTsd2LfkDypygbf7xr5u8o", "question": "What information is provided from the hanging object?", "choices": ["location", "temperature", "time", "brand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 58081, "question_id": "cWJUtvDxPwoXtYfRvU6oBP", "question": "The ads shown in the image is for what?", "choices": ["tires", "a/c", "door", "fuel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000058081.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57486, "question_id": "cXWZTX8EFNJLpT3ew5bC7m", "question": "How is the pizza being illuminated?", "choices": ["moonlight", "candlelight", "electric light", "daylight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057486.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193066, "question_id": "cYiF8gvdo9DDmAkAk6hQ99", "question": "What stops this kite from being lost?", "choices": ["insurance", "drone", "nothing", "string"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193066.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 110813, "question_id": "cZKvMzq6KHRShn3RLcTgzZ", "question": "How many airplanes are in this airline's fleet?", "choices": ["14", "30", "ten", "25"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000110813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168277, "question_id": "cZuTorg6dcLprpfYYZkyPu", "question": "Where is this cat located?", "choices": ["home", "vet", "zoo", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168277.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515016, "question_id": "cZumQTLVKyMN8c5HtQ6Taw", "question": "What is the best frisbee dog breed?", "choices": ["bulldog", "shepherd", "labrador", "poodles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515016.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478343, "question_id": "cagdaLejTUiHXG8vz7Gi9w", "question": "What language is on the yellow sign under the Stop sign?", "choices": ["japanese", "mandarin", "greek", "russian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304103, "question_id": "cayQWeh53Ufr32W9boF5Yp", "question": "What time signature is represented on the side of the bus?", "choices": ["4/4", "4/5", "2/2", "ten"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304103.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 2271, "question_id": "ccG57KmJxaMw8nFivjBLvq", "question": "What does this bus primarily run on?", "choices": ["electricity", "gas", "natural gas", "diesel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000002271.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 527343, "question_id": "cepU92jxgeVay2GebtmtnS", "question": "What kind of numbering is written on the clock above?", "choices": ["english", "latin", "alphabet", "roman"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000527343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472674, "question_id": "cfwMQGbYXnkUJUu4kJAKZy", "question": "What type of skiing is this person performing?", "choices": ["touring", "alpine", "downhill", "slalom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472674.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 304544, "question_id": "chdphp5Np7BD6bbaTWbgdP", "question": "What are the women doing with the food?", "choices": ["cleaning it", "selling it", "eating it", "cooking it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000304544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378217, "question_id": "citEpyLL2ENN9yP7mMH2P3", "question": "Who was this backpack company named after?", "choices": ["janis joplin", "janis paige", "janis lewis", "janis ian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378217.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142475, "question_id": "cjySphjFXDNi56soLoRuxa", "question": "What is immediately surrounding the person?", "choices": ["water", "rocks", "fish", "air"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121988, "question_id": "cmwivNESRBzDvpoEj6rdkH", "question": "What can clearly be said about the elephant just prior to this activity?", "choices": ["hungry", "in danger", "thirsty", "bored"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121988.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403603, "question_id": "cnnSpE8YL3uBhMpQaHfXgJ", "question": "What is out of the ordinary on the dog?", "choices": ["bowtie", "eye color", "nail length", "nose size"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403603.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180420, "question_id": "cpo98XKSadGts4C22Hyow3", "question": "What is the object around his leg called?", "choices": ["grip", "string", "surfboard leash", "strap"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180420.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534721, "question_id": "cq3saAqynj3aJLXnqPxRZr", "question": "What celebrity died at the age that the first two numbers on the front of the bus before the 550 represent?", "choices": ["james dean", "cameron boyce", "alec guinness", "jimi hendrix"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534721.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504963, "question_id": "cqyexmuukNfzfZRN2JLx8B", "question": "Which of these photography aspects affects this image the most?", "choices": ["color", "lightning", "focus", "motion"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504963.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264641, "question_id": "crf2b3sWvZ4wYNzesYUMYA", "question": "This cat's owner shops at which clothing store?", "choices": ["macys", "old navy", "k mart", "sears"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 469477, "question_id": "crvbmXSTVDnSESY6rx9QoU", "question": "What style bathing is seemingly unavailable here?", "choices": ["sink", "shower", "cold shower", "bathtub"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000469477.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 429626, "question_id": "cs2FjAYfc9zNJnpfGqBoSX", "question": "Why is there a plastic object in the pizza?", "choices": ["cutting", "warming", "serving", "balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000429626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302915, "question_id": "csKC7xtHRgSoGcK8of65tG", "question": "How many people are watching the surfing in body of water?", "choices": ["one", "waves", "several", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302915.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202555, "question_id": "csKj2UKv8iQTHzLhKVo9KA", "question": "What vegetable is held inside the small packet with an M on the label?", "choices": ["tomato", "lettuce", "potato", "carrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202555.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 294808, "question_id": "cu2sd3N79EVY2N8HhModA7", "question": "What natural feature does this young person face?", "choices": ["ocean", "volcano", "rock", "inland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000294808.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417939, "question_id": "cvD4YVRX8x9V6isAS8KkUf", "question": "What small crime is evidenced?", "choices": ["speeding", "noise pollution", "littering", "jaywalking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371312, "question_id": "cxAyycdSzjjetLrVvo9Arh", "question": "What floor of the building are they most likely on?", "choices": ["first", "third", "fourth", "second"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371312.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139570, "question_id": "cxketK757P3rmUb8LZMNY9", "question": "The item the animal is standing on looks most like what?", "choices": ["snowball", "nest", "couch", "chair"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000139570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 88435, "question_id": "cyVxNpAqBFBM4dBsNvPELZ", "question": "What is the weather where the man is?", "choices": ["raining", "cloudy", "clear", "snowing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000088435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 132201, "question_id": "d3JmcJHZP4JEsJmoqmoD7n", "question": "What type fencing is visible here?", "choices": ["wooden", "picket", "barbed wire", "wrought iron"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000132201.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133267, "question_id": "d4tnPQXoMBpLhcBzr4hZSF", "question": "This plane likely brings packages to what kind of area?", "choices": ["urban", "space station", "suburban", "rural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000133267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355201, "question_id": "d7ECViC5zTiiGwoghwF4k5", "question": "Why is there an area with no plants?", "choices": ["river", "gophers", "not watered", "walking path"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355201.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519585, "question_id": "d7yTQ6RGYyWtQSGCriw9gi", "question": "What lifts the person seen here off the surface?", "choices": ["whale", "helicopter", "wave", "wind sail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 428723, "question_id": "dD6vDVwWzuFyaXmS2thGrm", "question": "How many windows on the vehicle are directly facing the camera?", "choices": ["two", "three", "four", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000428723.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49806, "question_id": "dDvzeWvEqySgfXkzkzm5T7", "question": "What demographic is targeted in the advertisement on the bus?", "choices": ["young adult", "middle-age", "children", "seniors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551968, "question_id": "dFQtepHFpt8ZynmVue4AEe", "question": "The person likely wants what to this address?", "choices": ["space ships", "flights", "directions", "carpets"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000551968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410897, "question_id": "dFqdSywgTaR8m4sx3o7vZW", "question": "What does the sign indicate to drivers approaching it?", "choices": ["yield", "turn only", "stop", "drive faster"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410897.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99879, "question_id": "dGYG5nQrphVMKBGKBSct6J", "question": "What activity are the animals engaged in?", "choices": ["chewing", "drinking", "defecating", "spitting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099879.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463205, "question_id": "dKrf7n3wbtxwRDa42LTEvZ", "question": "Where does the man have padding?", "choices": ["head", "back", "elbows", "knees"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463205.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20331, "question_id": "dL3tfm6UgyRLQkzPV6iyAH", "question": "What looks most likely to be being carried at the moment by the train?", "choices": ["luggage", "paper", "toys", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020331.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470835, "question_id": "dLReJG6TxJZSD8jKDy9Ezg", "question": "What type of food is grown on these trees?", "choices": ["vegetables", "fruits", "nuts", "beans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 54356, "question_id": "dMYoMjBScaXEPqP6NQGM3t", "question": "What word is used when playing this sport?", "choices": ["home run", "touchdown", "goal", "serve"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000054356.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136615, "question_id": "dMgD9BUFYsAk2CsTQibRfS", "question": "What was this broken appliance used for?", "choices": ["cooking", "music", "security", "cleaning"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136615.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170282, "question_id": "dVRMNW3aRWAeQipbKKfyxu", "question": "What type of traffic is permitted on this street?", "choices": ["one way", "all way", "two way", "no way"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170282.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59621, "question_id": "dWqCaE8sk2AbpfHRtRk2Wb", "question": "How much training would it take to be able to coordinate the airplanes into this formation?", "choices": ["weeks", "days", "months", "years"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000059621.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 495862, "question_id": "dY8yPjKJEA3rHzUmvftWxT", "question": "What fallen object is the bear investigating?", "choices": ["wall", "house", "shed", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000495862.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146653, "question_id": "dYo7J9GWm8XgkpgVkLmn3J", "question": "How is the equipment on the animal's mouth called?", "choices": ["briddle", "horse rope", "leash", "flaps"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146653.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188319, "question_id": "dZXVAdFPV2C2xsCDDrNsW4", "question": "What information does this object show?", "choices": ["time", "longitude", "temperature", "latitude"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315804, "question_id": "daztf2jaUgyaiPrZNtNEeG", "question": "What is missing from the skateboarder's attire?", "choices": ["shoes", "safety gear", "shirt", "cap"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315804.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157479, "question_id": "dbdxTmqqu4DA4XRRdZJgE6", "question": "The words at the front of the plane refer to the company's what?", "choices": ["warning", "website", "motto", "history"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 471341, "question_id": "dbiSeZ2bRDiWHBNCZ7KEvq", "question": "What is furthest from the counter?", "choices": ["box", "folded fabric", "menu", "pizza"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000471341.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 9970, "question_id": "dc6ib46PTuuxZEBqppqu9s", "question": "What is the giraffe next to?", "choices": ["truck", "tree", "another giraffe", "building"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000009970.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 525771, "question_id": "dcDqXo9sSf6nP7zZdVYWG2", "question": "Why is this person extending his arm?", "choices": ["balancing", "posing", "stretching", "signaling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000525771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102565, "question_id": "ddKDWb6sSsdKeAvNN67bX8", "question": "What type clothing should persons exiting this plane wear?", "choices": ["bikinis", "light", "none", "winter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507184, "question_id": "ddXQNBj9pYzN5njCAabGUZ", "question": "What type of lighting is used?", "choices": ["pendant", "lamp", "chandelier", "pot lighting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81007, "question_id": "ddZs7mARuHxvizZex2s8JF", "question": "Which object is the biggest fire hazard?", "choices": ["washing machine", "tv", "toaster", "microwave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081007.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 344639, "question_id": "defTLmJdWmiyiFhcdowguZ", "question": "What type of book is underneath the flowers?", "choices": ["nonfiction", "coloring", "fiction", "poetry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000344639.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 206479, "question_id": "dezq53naeEYH5hmcpe2REJ", "question": "What are these animals ready to do?", "choices": ["drink", "eat", "sleep", "attack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000206479.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 144825, "question_id": "dgeLeNgifo9h2T85pxqWko", "question": "Why are the ladies eyes wide opened?", "choices": ["is surprised", "is scared", "is tired", "is mad"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000144825.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102535, "question_id": "djSSUcf64ir36sYu8jefPR", "question": "What part of the animal is touching water?", "choices": ["feet", "eye", "wing", "beak"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102535.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195552, "question_id": "dkGMtmiVKrDZ6aL293WeH8", "question": "If he were to make noise what would you hear?", "choices": ["woof", "quack", "meow", "baa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195552.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229115, "question_id": "dm7SNwmRCVoVsFvLaVYfM2", "question": "What species is seen here?", "choices": ["porcine", "human", "ovine", "ursine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229115.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126544, "question_id": "dmc7Y7eXcDXTcVU7BHSWco", "question": "What discourages cows from brushing against the fence and escaping?", "choices": ["bulls", "barbs", "people", "signage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 270660, "question_id": "dnwy98Sp2uswjQkXt7PGGE", "question": "If someone was deaf and needed additional information while viewing TV what color should they select here?", "choices": ["red", "green", "blue", "yellow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000270660.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 472655, "question_id": "dqoLVpH69DC5mrRp2XPnSD", "question": "Which cat is most likely to touch object they're looking at if they all jumped?", "choices": ["equal chance", "cinnamon cat", "middle cat", "right cat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000472655.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 94474, "question_id": "droLkQHNnE7PWYuFdgTmLk", "question": "What is the sign shaped like an octagon most likely telling drivers to do?", "choices": ["turn", "park", "exit", "stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000094474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146351, "question_id": "drrozWVVQoGdjuTizdd8Da", "question": "Which of the animals in the picture have their own zodiac sign?", "choices": ["jaguar", "goat", "bear", "mermaid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146351.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565122, "question_id": "dtdEmUEhwMidFFtWz759h8", "question": "What is the most likely reason for smoke coming from buildings in the background?", "choices": ["kitchens", "smokers", "building fire", "they're factories"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000565122.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40465, "question_id": "dvoE2udKeji5JeZcbwwe8n", "question": "The yellow graphic is an example of what language tool?", "choices": ["ideogram", "poetry", "simile", "metaphor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040465.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191087, "question_id": "dxCqqVnY9A9xBoCCvViiS7", "question": "Why would someone sit at this table?", "choices": ["to paint", "to eat", "to work", "to sew"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365181, "question_id": "dxiJnoUHiFze8PJ3M9xCnx", "question": "What was the red wall of the archway made from?", "choices": ["glass", "sand", "aluminum", "bricks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 458589, "question_id": "dyLJC2Lt7o6JDhbi32cnMC", "question": "What are the folded items meant to be used for?", "choices": ["working", "sleeping", "showering", "jogging"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000458589.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 288774, "question_id": "e2maN35ByBQ6HFTeqPZh9t", "question": "What is another surface that this game is commonly played on?", "choices": ["grass", "sand", "concrete", "tarmac"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000288774.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473530, "question_id": "e4d2GtUVjRcTC42nKajf8e", "question": "The discs on the side of the back of this vehicle resemble what musical medium?", "choices": ["8 tracks", "cds", "vcr tapes", "cassettes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336413, "question_id": "e5zbxFnanEDLNQHox9gmDh", "question": "What animal is this animal traditionally depicted as an enemy of?", "choices": ["mice", "tigers", "bats", "sharks"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118704, "question_id": "e7B7FrcRZxpKUV9xWRcnrr", "question": "Where will the animals quench their thirst?", "choices": ["watering hole", "bowl", "trees", "trough"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118704.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 296726, "question_id": "e8dXFyGFSRqbosyjafanK6", "question": "What do the wheels on the bottom of the plane help with?", "choices": ["landing", "noise", "wind resistance", "dynamics"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000296726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103392, "question_id": "eD2FUm7gbpQujg836fERoV", "question": "Which item on the plate contains the most vitamins?", "choices": ["meat", "turkey skin", "potatoes", "greens"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353717, "question_id": "eJWw5S4LBpYELj627TRc4S", "question": "The logo for this beverage company was introduced during what war?", "choices": ["1812", "wwi", "wwii", "civil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353717.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393713, "question_id": "eJXqgvJz9u9sFnLGSCHUcd", "question": "How long can elephant tusks get?", "choices": ["11 feet", "8 feet", "7 feet", "9 feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000393713.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168396, "question_id": "eKKNxeraryVyX28P5gykAV", "question": "Where are they going?", "choices": ["into water", "grocery store", "home", "school"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451545, "question_id": "eKkDrHEe7C5iopSvyzpYdb", "question": "Why sanitary wares are white in color?", "choices": ["material color", "represent cleanliness", "stain", "cleaning"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000451545.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38182, "question_id": "eLRb9PDSjMVAmyW6zTqiWj", "question": "Why is the bear bleeding?", "choices": ["fight injury", "got cut", "surgery", "was shot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038182.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74061, "question_id": "eMDphymE5GzUyAfz72NFfj", "question": "Which direction is the white arrow on the sign pointing?", "choices": ["east", "north", "south", "west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074061.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232152, "question_id": "eMTcxiizZPBysohxoDQR3M", "question": "How is the man holding the board feeling?", "choices": ["happy", "scared", "annoyed", "angry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232152.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473303, "question_id": "eNbuhyHFzV6eRGf6rWvPRS", "question": "Why would someone sit at this table?", "choices": ["to eat", "to saw", "to sew", "to paint"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264745, "question_id": "eNpcQ4qPFjbadm8UQBM2Ht", "question": "What is the only type of vehicle that can turn left at this traffic light?", "choices": ["buses", "trucks", "motorcycles", "cars"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264745.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40739, "question_id": "ePz25eii6PiqBztonAXpFy", "question": "What number is the big hand of the clock pointed to?", "choices": ["seven", "six", "eight", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040739.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 88194, "question_id": "eQACtvma279wzH4mJedaGZ", "question": "What plugs into this device for charging?", "choices": ["mini usb", "hdmi", "printer port", "coax cable"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000088194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10575, "question_id": "eQjsxeQwfsB6Xzkr2UwUZS", "question": "What are the wheel objects on the plane?", "choices": ["tricycles", "cars", "spare tires", "landing gear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000010575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 447916, "question_id": "eRRMET4ZcVKpkUxoZVsKUH", "question": "What food is this animal a vital ingredient in?", "choices": ["soy burger", "lamb chops", "rabbit stew", "beef burger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000447916.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155769, "question_id": "eRdKDbL8J4hPnBgYcPLu8h", "question": "The paper underneath the scissors usually comes in what form?", "choices": ["sheets", "bundle", "scraps", "roll"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155769.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574768, "question_id": "eUYVVbjbeoiWCmCswnJ3i8", "question": "If one more bird landed on the structure how many would there be?", "choices": ["three", "one", "six", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000574768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202208, "question_id": "eUzXhZF6Gwh8ETskHzaGMy", "question": "What caused the color on the second hand?", "choices": ["paint", "acid", "rust", "oil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102258, "question_id": "eVccuhHqBoTha7nwHsDn7Y", "question": "What type of road is the elephant crossing?", "choices": ["railroad", "dirt", "side street", "highway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249797, "question_id": "eVimhrDo4GEP4mXZBygsbv", "question": "The single folded towel on the bed indicates this room is where?", "choices": ["hospital", "hostel", "dorm", "hotel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249797.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486279, "question_id": "eWA2mGVHbqabfYEMw3wmbu", "question": "What kind of scissors are those?", "choices": ["craft scissors", "fabric scissors", "hair scissors", "kitchen scissors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465731, "question_id": "eWHoFfGDqZqEDwJrVYdQVQ", "question": "What mode of transport is displayed in the image above?", "choices": ["air", "road", "railway", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465731.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66393, "question_id": "eXf3zcsBt85xBr6WRbLXy6", "question": "What would he do if he is really happy?", "choices": ["wag tail", "bite", "bark", "whimper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066393.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 580380, "question_id": "eYCtC9drpiuRVdEKEsWpge", "question": "What is the bear holding in it's mouth?", "choices": ["fish", "cup", "meat", "bowl"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000580380.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 14084, "question_id": "eZuuiLGqLUKE77SQ8cgKxi", "question": "What might this animal do first if it senses danger?", "choices": ["bite", "nest", "fly", "peck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000014084.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191087, "question_id": "ebjML7D3QxY8SBo8rKVQvD", "question": "What is the Chinese version of this green vegetable called?", "choices": ["kun choy", "choy sum", "gai lan", "bok choy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123000, "question_id": "edVzSjj5tLHLyyYc2BWZFf", "question": "Why are the flowers and leaves on the floor?", "choices": ["designed to", "plant dying", "dog", "windy day"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256146, "question_id": "edcBVTdpFgs8JbP7YrE79s", "question": "What type of house is the one pictured above?", "choices": ["mud", "modern", "temporary", "hut"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000256146.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 580982, "question_id": "edwRNghfRWX98avaHcDGwJ", "question": "What type of person uses the toothbrushes present in the two holders?", "choices": ["child", "teenager", "adult", "baby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000580982.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317197, "question_id": "eeXpwb6xNB9ooxTViH4WFx", "question": "What is the item on in the back window used as defence against?", "choices": ["sun", "snow", "rain", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317197.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179596, "question_id": "egEeCFoZsaMAA2vwpTdiwD", "question": "What type of furniture is this equipment located on?", "choices": ["wicker", "wood", "plastic", "upholstered"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179596.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439830, "question_id": "egKEmkbeYXzj54sLiFJ6iR", "question": "How much too late is the person for gaining entry to this business?", "choices": ["1 hour", "2 minutes", "none", "30 min"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000439830.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349086, "question_id": "egpoDbHQqpdtEeoAKkTvAF", "question": "What does this device work with?", "choices": ["computer", "refrigerator", "motorcycle", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349086.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 265765, "question_id": "ehAye27aMw8FWUSESf4Cc7", "question": "Where is the motorcycle currently moving?", "choices": ["north", "east", "nowhere", "west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000265765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 21722, "question_id": "ehMCQ6eFFRdtE5XrtZC2do", "question": "What is another name for one of the individual objects being stacked up?", "choices": ["garage", "luggage", "storage", "tower"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000021722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356041, "question_id": "eiBY2MkpcrvkBuYT4zmnPi", "question": "What is the yellow item worn over the shoulders of the skier?", "choices": ["sweater", "suspenders", "racing bib", "jacket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356041.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446323, "question_id": "ejkkcvFM6fpfwrwfNH2NEs", "question": "Why is he standing like that?", "choices": ["falling", "slipping", "showing off", "maintain balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446323.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581207, "question_id": "emp4iNhFhYTNgRYFy3CipM", "question": "How much was Danielle paid to paint this wall?", "choices": ["1 million", "120", "nothing", "100"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581207.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369216, "question_id": "enNXmTYYzWMwjcbUWYWbc7", "question": "In what room is this woman located?", "choices": ["office", "kitchen", "bedroom", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369216.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570583, "question_id": "enuS2E36M5ALPW9n2cuPus", "question": "What is the likely gender of the person who snapped this photo?", "choices": ["male", "non-binary", "can't tell", "female"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000570583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 368902, "question_id": "eo3aLQT9zdHTtXZmLSLjyN", "question": "What kind of outfit is the person wearing?", "choices": ["casual", "costume", "uniform", "business"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000368902.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546051, "question_id": "epNicjaNwYyDGY82tjhw6g", "question": "What shape do this animal's markings most resemble?", "choices": ["circle", "rhombus", "hexagon", "square"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546051.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298244, "question_id": "eq4N5tguEbVkKxJahgjKJN", "question": "What is above the toilet?", "choices": ["robe", "shelf", "art", "cabinet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298244.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11394, "question_id": "eqHXsaJbHLU6nTyuEE5pGy", "question": "How will he get down after climbing up?", "choices": ["lift", "snowmobile", "jump", "ski"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011394.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156165, "question_id": "eqsVwq23MEZWkHkLkLy6TT", "question": "What type of bathtub is pictured?", "choices": ["freestanding", "back-to-wall", "built-in", "corner"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000156165.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 279576, "question_id": "erVzaqSr6QuaAX3VkPHEv3", "question": "From where does the whitish substance seen here emanate?", "choices": ["people paint", "birds", "donkeys", "goats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000279576.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 60067, "question_id": "etXMoNs7BSn9RUDAu2h4ic", "question": "Which year's Olympics was this?", "choices": ["2004", "2000", "2016", "2012"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000060067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136607, "question_id": "etiJDwKjeHbEiUPbiHCeEo", "question": "What might the giraffes here consume first?", "choices": ["grass", "low shrubbery", "rock", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168907, "question_id": "etryjmiRJkRP2FASBqaskR", "question": "A lack of what would cause the toilet water to be colored?", "choices": ["toilet paper", "water", "flushing", "plunging"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000168907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227707, "question_id": "evRWJNn2yraqzfrdkcDBaf", "question": "The top vehicle is designed to travel where?", "choices": ["underwater", "africa", "mars", "space"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333914, "question_id": "exkB3BtytKZRECLvSKXizo", "question": "What expression is the person doing?", "choices": ["sulking", "sneezing", "crying", "laughing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 553099, "question_id": "eyLabFNDu2f5t7XkgWfeJH", "question": "How can the proportions of these legs be described?", "choices": ["short", "stocky", "humanesque", "tall"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000553099.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 137298, "question_id": "ezzTFbhYw4pwhaPM9DXtjx", "question": "How many zebras are drinking water in this image?", "choices": ["one", "three", "eight", "two"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000137298.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512804, "question_id": "f2HbbyTRBzNz7QVAf2a9GY", "question": "What is the middle initial of this company's founder?", "choices": ["d", "w", "l", "s"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000512804.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178243, "question_id": "f2nhvuHD8KRHweHSbkLUdR", "question": "What is the child doing?", "choices": ["hiding", "cleaning table", "chewing table", "stealing laptop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35838, "question_id": "f3ZNZocWMix9ukZoauTbdD", "question": "Where could these signs be?", "choices": ["portugal", "united states", "mexico", "england"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000035838.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385287, "question_id": "f3jBAawP3xDdSKeXZPhJ39", "question": "How many destinations does this airline visit?", "choices": ["25", "50", "100", "80"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000385287.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 545218, "question_id": "f46XbukVdjvDhNuTaQWfcn", "question": "What type of table is the bowl sitting on top of?", "choices": ["wooden", "wicker", "picnic", "glass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000545218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 384614, "question_id": "f4k65U3AShprWZbuJkPAMH", "question": "What is the plane having to navigate to land safely?", "choices": ["people", "buildings", "trees", "animals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000384614.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 75819, "question_id": "f564wxchTemdYLojVvXaT8", "question": "What is the swarm of sea birds looking for in this location?", "choices": ["lobsters", "sharks", "small fish", "large fish"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000075819.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 572624, "question_id": "f6iobUCGfxfGfvzV8KhZt8", "question": "What grain is used to bake this food?", "choices": ["wheat", "pumpernickel", "rye", "corn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000572624.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 134931, "question_id": "f6mVPkFQ9HMsbGeXr84gjC", "question": "What job does the person in the round tower hold?", "choices": ["none", "police", "bird watcher", "factory worker"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000134931.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57687, "question_id": "f7xLAVTiWRK3rGCRGTFkN8", "question": "Why is the elephant two different colors?", "choices": ["painted", "striped", "injured", "wet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057687.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420095, "question_id": "f9yGJgkjbH3cQApKkXHnHS", "question": "What is preventing the bird from flying indoors?", "choices": ["injury", "window", "owner", "fencing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420095.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419118, "question_id": "fEagEi8ugLW9aKzQ2TQYCy", "question": "What must they pay if they want to ride?", "choices": ["donation", "fare", "toll", "fine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 406262, "question_id": "fFFFPhTawkrk86rwg6cd9z", "question": "Where will this train likely go today?", "choices": ["no where", "dallas", "chicago", "galveston"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000406262.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468327, "question_id": "fFRSGUa76Y7LUDynDPef9H", "question": "What mountain is this near?", "choices": ["glittertind", "ailigas", "brenibba", "sentraltind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000468327.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38401, "question_id": "fFf9AmYCavcntrGtVSkz93", "question": "What items are below the plane?", "choices": ["signs", "guns", "cows", "antelopes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 77210, "question_id": "fFhvi7Nn5WwF5Uku4A7DQg", "question": "The item the dog is sleeping on provides what?", "choices": ["exercise", "warmth", "shade", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000077210.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103345, "question_id": "fG5DTsKRKDyWQLjPuiZ6E2", "question": "What main item does he need to put on to be dressed?", "choices": ["hat", "pants", "cufflinks", "watch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103345.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229579, "question_id": "fGN3Am32n6hGbiMmdUcYua", "question": "Which fruit is the least juicy?", "choices": ["green", "brown", "yellow", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536197, "question_id": "fJQjCCYBrNHMuw9mjDHXjd", "question": "The man's hairstyle is most similar to what?", "choices": ["bouffant", "crew cut", "beehive", "man bun"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536197.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 98343, "question_id": "fJbzbdtg4VpzHKwyLxRTz7", "question": "What are the green signs above the stop sign showing?", "choices": ["delays", "streets", "phone numbers", "traffic warnings"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000098343.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 571705, "question_id": "fL3DsTCyw2pMMgQdGkKqrY", "question": "The waste can is made from what material?", "choices": ["metal", "wood", "marble", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000571705.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 46036, "question_id": "fMfqMndnngMVsiDXmLvb3F", "question": "What type of transportation is shown?", "choices": ["air", "road", "rail", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000046036.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49142, "question_id": "fRMD8Kv5uGDsz3hmkuzX5x", "question": "Who is hanging out in the background?", "choices": ["moose", "hippopotamus", "ostrich", "gazelle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049142.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215294, "question_id": "fSqbA8cDoVfHkqvqvPyezk", "question": "What are the white lines in the sky commonly called?", "choices": ["bumps", "contrails", "blips", "letters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000215294.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488579, "question_id": "fVx5WJqMYLSofACExDEpCo", "question": "What can this software be used to do?", "choices": ["edit photos", "edit videos", "edit music", "word process"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35261, "question_id": "fW9DxAUrupScELHNFAhtiU", "question": "Which animal is the giraffe most likely to notice in its current position?", "choices": ["sheep", "duck", "zebra", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232162, "question_id": "fWGEt42KBinGmmHC77X2gK", "question": "What is shooting up in the sky?", "choices": ["rifle", "firecracker", "paratrooper", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232162.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 71950, "question_id": "fWPfEXssYii7wLViMAEd2i", "question": "Where would these scissors most likely be found around a house?", "choices": ["garage", "bathroom", "living room", "dining room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000071950.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297062, "question_id": "fXYd9TvEKZAvypRdc9HzHy", "question": "Why is he looking down?", "choices": ["dropped sandwich", "tired", "avoiding camera", "bad news"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000297062.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 173151, "question_id": "fZmwr5PJ8R975oSmEbHKbX", "question": "What action are the red-handled scissors specialized for?", "choices": ["slicing", "cutting", "edging", "trimming"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000173151.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81349, "question_id": "faQNxgKRaRyjUKCwnsY6Nh", "question": "What is the same color as the sign?", "choices": ["clown nose", "bee", "candy cane", "iguana"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000081349.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 413002, "question_id": "fbkfZQZNwjrJVHEsCHy4q9", "question": "How many of the stuffed animals are pink?", "choices": ["two", "one", "three", "four"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000413002.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 410564, "question_id": "fcJ5sMcyaBBeGs5wjjbxgP", "question": "What is the red office supply beside the cat?", "choices": ["calculator", "ruler", "pen", "stapler"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000410564.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 393792, "question_id": "fdsoDVRLnDv2oxguPx2RtV", "question": "What style bathing is available here?", "choices": ["none", "walk in", "shower", "spa"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000393792.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155065, "question_id": "fes8pEnydVBWfVaporve54", "question": "Where are these animals located?", "choices": ["office", "home", "vet", "barn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47146, "question_id": "fezTqZBpcQursAUAZ2uBmP", "question": "What type of coat coloration does the cat on the top shelf have?", "choices": ["calico", "tabby", "tuxedo", "tortoiseshell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000047146.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355590, "question_id": "fg58Soyz6bY4nP8GxknzVc", "question": "What does the child have on their head?", "choices": ["hood", "helmet", "crown", "beret"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 530319, "question_id": "fgb2HkvhviaFWhkVifxN4k", "question": "What weather phenomenon is the dog playing in?", "choices": ["hail", "hurricane", "snow", "rain showers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000530319.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 375297, "question_id": "fgfCrXtc7mZmafyrxPVMGM", "question": "What color bedspread does the owner of this dog sleep beneath?", "choices": ["floral print", "plaid", "solid", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000375297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 16924, "question_id": "fhS4HMsgizy9ZQSWgMUwwT", "question": "What activity is the zebra performing?", "choices": ["hunting", "sneaking", "drinking", "grazing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000016924.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365932, "question_id": "fheMogezapWdz2ee6beGYE", "question": "What is the pulley system to the left called?", "choices": ["dumb waiter", "ski lift", "gondola", "rope tow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577218, "question_id": "fijgfa8pQ36SaTwAQKg4uh", "question": "What is a well known city that is located in the country from which this plane originates?", "choices": ["helsinki", "brooklyn", "antwerp", "cairo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511178, "question_id": "fkjTu9tB67F95woimXLeXt", "question": "What color allows this animal to blend in here?", "choices": ["green", "white", "brown", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511178.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188506, "question_id": "fmud2PMbAKXHZfejFaJS3w", "question": "What room does the dog seem to be sitting in?", "choices": ["kitchen", "bedroom", "hallway", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188506.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356040, "question_id": "fmyZqAo3hH3E8QgXzLAMmY", "question": "What is one way to describe the overall arrangement in the center of this room?", "choices": ["symmetrical", "sparse", "crowded", "chaotic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356040.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551932, "question_id": "foBYqidoKpxVcWhNQqNx2m", "question": "Who likes to eat the fruit that is pictured here?", "choices": ["snoopy", "donkey kong", "crash bandicoot", "garfield"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551932.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528593, "question_id": "fpoeUkd4dE8htHU65769tZ", "question": "Where is she brushing her teeth?", "choices": ["bedroom", "kitchen", "garage", "bathroom"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528593.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 252456, "question_id": "fst5tpS4qcUS3kE4P9vqmB", "question": "What secures the yellow ball to this place?", "choices": ["nothing", "sand bar", "driftwood", "anchor"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000252456.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196303, "question_id": "fxEzC6EwxCab8FH3id58Yj", "question": "What are the bears having?", "choices": ["grass", "venison", "tea party", "prey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000196303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 257607, "question_id": "fxNGQiuH4WLnAb66ag7Z4K", "question": "Why is he holding the board?", "choices": ["is broken", "stole it", "is resting", "going home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000257607.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 579135, "question_id": "fyHvQFLPeqNBdNFTDeE6yd", "question": "What is the lightest color of flower in the vase?", "choices": ["white", "pink", "purple", "black"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000579135.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 17552, "question_id": "fyJt9F7ADz7uEST96EQtFG", "question": "What can they put up for more privacy?", "choices": ["fence", "screen", "curtain", "door"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000017552.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 213335, "question_id": "g3vHzK9JEhs42RuYHgBi56", "question": "What course is being served?", "choices": ["dessert", "salad", "appetizers", "entree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000213335.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198194, "question_id": "g4MUhL86TYJYusi9JcHHsS", "question": "What entrepreneur founded this company?", "choices": ["vernon rudolph", "ralph dupree", "abe winsted", "scott ford"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198194.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 3111, "question_id": "g5q4RLtTRRRA4XygB5E9Gy", "question": "What would cause the kite surfer to go faster?", "choices": ["boat wake", "paddle", "wind gust", "big wave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000003111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 208977, "question_id": "g6Ht63HwXBgmEjteVUwLj6", "question": "What kind of surface is the person skiing down?", "choices": ["dune", "cliff", "slope", "road"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000208977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 327541, "question_id": "g8cPmAR2mvUReJXPWi7KzJ", "question": "Why might someone want to avoid using the knife?", "choices": ["too big", "blunt", "dirty", "too small"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000327541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 444561, "question_id": "g8ncrRZpb4cnPiqq9xTxve", "question": "What is needed for this activity?", "choices": ["wind", "ice", "water", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000444561.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 203979, "question_id": "g8vzQamL7ko8HhRoFBRyUJ", "question": "What is he doing?", "choices": ["cleaning board", "getting lunch", "wandering around", "seeking waves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000203979.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195659, "question_id": "g9TZasdsgXnMjnXSJBmWw4", "question": "What does someone want continuity on here?", "choices": ["food", "settings", "text", "nothing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000195659.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533035, "question_id": "gAGtPREe2oAoqqCSYszAMc", "question": "Why is the bird's shadow behind it?", "choices": ["is hungry", "can't see", "facing sun", "is hiding"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533035.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577836, "question_id": "gAGtT69U69deadaPPF98QG", "question": "What does this vehicle use for propulsion?", "choices": ["engines", "propeller", "wind", "solar power"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577836.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349749, "question_id": "gAJra5SYs4idFLwwGBcjqN", "question": "What is the yellow stuff on the plate?", "choices": ["banana", "lemon", "cheese", "pepper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349749.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102789, "question_id": "gBCmDmKn3TBv2ikg4Muckr", "question": "Which food in the bowl is found in the ocean?", "choices": ["shrimp", "green bean", "carrot", "asparagus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477551, "question_id": "gC2BajZ8XDY9ZfAentgkEq", "question": "What basketball teams jersey is the man wearing?", "choices": ["clippers", "lakers", "bucks", "warriors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216654, "question_id": "gCWMJTkYHgV38qm52uRUd9", "question": "In what country is this building found?", "choices": ["italy", "britain", "china", "france"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216654.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26649, "question_id": "gEqqdSxRp2nEje9jKvq6DK", "question": "Where is this item usually sold?", "choices": ["mcdonald's", "pizzeria", "wendy's", "steak house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 180512, "question_id": "gF96zoYAvDi44WcqKPDfWM", "question": "What is the cat sitting next to?", "choices": ["potted plant", "oven", "tree", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000180512.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136362, "question_id": "gHgM5DMcFuSws6H5XaM8AR", "question": "Which roll of toilet paper was replaced most recently?", "choices": ["bottom", "top", "right", "left"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000136362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 560400, "question_id": "gKcZDTUPTfPrekn2oNvcyD", "question": "What is the purpose of the openings near the rear of the bus?", "choices": ["board", "ventilation", "visibility", "storage"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000560400.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 191401, "question_id": "gLHJfJS5W3xQ4F8XiZ7D97", "question": "While waiting for this conveyance to arrive what type of seating might you utilize?", "choices": ["stool", "sofa", "couch", "bench"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000191401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 436517, "question_id": "gMBWGAMH4s8Ck3DcHTq7sE", "question": "What is the red object used for?", "choices": ["fertilization", "sprinkler", "fire hose", "traffic control"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000436517.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 359742, "question_id": "gPRykccCiRBiKUHuoYSdki", "question": "What breed of dog is the toy modelled after?", "choices": ["pitbull", "beagle", "golden retriever", "pug"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000359742.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408741, "question_id": "gPi3WDuTixQs8VTvfDEea4", "question": "What is beneath the person's feet?", "choices": ["sand", "their opponent", "mud", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000408741.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 46939, "question_id": "gQFT9AW22nfMMZgPviiGrs", "question": "What move is the boy attempting?", "choices": ["nollie", "grind", "ollie", "kickflip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000046939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56959, "question_id": "gQpDgAAUwRE2CCCbAXxFRw", "question": "What would happen to the bear if the tree behind it ceased to exist?", "choices": ["go hungry", "fall sleep", "begin flying", "shift backwards"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056959.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 62811, "question_id": "gRerLukRroKaGrdYE4NGWa", "question": "Which object would deter the giraffes the most?", "choices": ["pole", "fence", "grass", "tree"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000062811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269765, "question_id": "gTcjWdR8BCPhRPe2eEgwx9", "question": "What type of device is shown?", "choices": ["cell phone", "laptop", "kindle", "tablet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 251068, "question_id": "gTfimghYppYaRKZoLZpH4K", "question": "What vegetable is the reddish color on this sandwich originally derived most from?", "choices": ["cabbage", "beets", "eggplant", "tomato"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000251068.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 578211, "question_id": "gUGfqMh4CAJAwMrL9sxGdV", "question": "What should the clock be replaced with to make it more realistic?", "choices": ["shovel", "bag", "box", "head"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000578211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 352795, "question_id": "gUwrWdEfV3QHHtNg3QrABp", "question": "What sort of event is this clothing item appropriate for?", "choices": ["informal", "casual", "beachwear", "formal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000352795.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 564937, "question_id": "gWFooDpNFfFvZKGiJPEWSz", "question": "What time of shirt is the man wearing?", "choices": ["t-shirt", "plaid", "tuxedo", "v-neck"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000564937.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 493116, "question_id": "gWSPeqkvHU7GyahbkhDRhi", "question": "What is separating the two surfers?", "choices": ["wave", "boat", "shark", "board"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000493116.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291254, "question_id": "gZgXigCxsuu2rwr5csDyC5", "question": "Why is the cat here?", "choices": ["can't move", "wants food", "person posed", "likes keyboard"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000291254.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 216423, "question_id": "gbTcMoPwJbNVMZcKpXTNd8", "question": "What sport has a hall of fame in the location listed on the side of this train?", "choices": ["tennis", "basketball", "racquet ball", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000216423.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192446, "question_id": "gcNAKM9MCKhtwPXH3yWekJ", "question": "Which of these businesses are known for selling the food in the man's hand?", "choices": ["wendy's", "dunkin donuts", "starbucks", "mcdonalds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 300914, "question_id": "gd8fM3mUkA7qxEZKFMaDDk", "question": "What is the name of this airline's frequent flier program?", "choices": ["flymiles", "skymiles", "airmiles", "cruisemiles"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000300914.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147191, "question_id": "gdBvYao4xELdnG4Bbvhvo8", "question": "What item is similar to what the boy is touching?", "choices": ["tennis ball", "treehouse", "baseball bat", "rocking horse"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000147191.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207277, "question_id": "geLjiuHsQ54Q888JpMkqhy", "question": "How is the skier changing direction?", "choices": ["snow", "skis", "poles", "boots"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207277.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56803, "question_id": "gex65d7T2Mj5VuY65JzGk6", "question": "What number should be written nearest to where the minute hand is?", "choices": ["four", "one", "five", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152972, "question_id": "gfQ9wLbHELAgisazFr3Adp", "question": "Images seen on kites here originated in which media?", "choices": ["dvds", "comic books", "bible", "stage plays"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152972.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 244346, "question_id": "ggKybz9h9cH7yaRYyng4ny", "question": "What does the tan and white object give?", "choices": ["light", "heat", "comfort", "information"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000244346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229298, "question_id": "ggXZ7mGFSmjxiZ9P8k35bJ", "question": "What kind of seating is available here?", "choices": ["ground", "bench", "cinema", "stool"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229298.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431806, "question_id": "gjrdE8jYoc2FBRMLHt89wm", "question": "Which type of chef's usually use this type of pot?", "choices": ["norwegian", "mexican", "bahamian", "chinese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200505, "question_id": "gkTbNAJUDbGkPmcadudmS6", "question": "What is essential for this sport?", "choices": ["ice", "grass", "sand", "snow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200505.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27328, "question_id": "goXLaTpAKnk7pessgtFdTf", "question": "What is the common color of the cat eye?", "choices": ["blue", "green", "golden yellow", "white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000027328.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 504281, "question_id": "gpzwVy4QmrRi2pYz9P6pnp", "question": "What are the birds doing?", "choices": ["mating", "flying", "feeding", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000504281.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290805, "question_id": "gqGoF3XHxL3zm7gV4ry5KE", "question": "What will most likely be added next?", "choices": ["twizzlers", "icing", "biscuits", "fudge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290805.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 505302, "question_id": "gqgSTtqs76BuhDiiCzuA9K", "question": "What kind of activity are the elephants engaged in?", "choices": ["mating", "sleeping", "grazing", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000505302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 109684, "question_id": "gqy4q6uUiwvPTEBnibKLxR", "question": "What is the man wearing black shoes doing?", "choices": ["pooing", "flushing toilet", "washing hands", "peeing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000109684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47196, "question_id": "grHdXkXq6eDjmi3WwrvyTW", "question": "What do these animals like to eat?", "choices": ["eggs", "grass", "pork", "bread"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000047196.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 534595, "question_id": "grJvLwhAMrdMx4z2DRGQWm", "question": "What hour will it soon be?", "choices": ["12", "eight", "nine", "11"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000534595.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30582, "question_id": "gs8tbGDZn449W2fVLB3uNk", "question": "What is the small gray circle on top of the mouse used for?", "choices": ["pasting", "typing", "scrolling", "licking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 443622, "question_id": "gtsGk4MwJhR3xgDuGQktDZ", "question": "What might the parasols here meant to be adorning when made?", "choices": ["frogs", "tea", "milkshakes", "cocktails"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000443622.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 266855, "question_id": "guc2cgMPYJMJhomqjuHQfw", "question": "What is the object the cat is playing with made out of?", "choices": ["soda bottle", "old lamp", "tin can", "mail package"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000266855.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386935, "question_id": "guwEGHTJW7M3Qi5mQxAzAa", "question": "What are the people using?", "choices": ["snowboards", "feathers", "boxes", "swords"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386935.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 535039, "question_id": "gw7vBCmPbWwFSSAneq5EUv", "question": "What is this athlete using to steady himself?", "choices": ["hand weights", "pole", "shoes", "ankle weights"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000535039.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372975, "question_id": "gwfoLVNxqC8n7ckhxwaU8E", "question": "What state are they driving through?", "choices": ["nebraska", "california", "new york", "minnesota"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 372354, "question_id": "gwsXjofCj7z59Noo6Xu52R", "question": "What is usually used with the vehicle here?", "choices": ["pogo stick", "helmet", "propeller", "seat belt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000372354.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414689, "question_id": "gxKuGChWRDEJgSAN98TCxV", "question": "This woman is communicating in which manner?", "choices": ["public", "private", "digital only", "none"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414689.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438770, "question_id": "gxwWy8sDhgZpmKcjKPJsZd", "question": "What is the area raised up off the water below the smoke called?", "choices": ["turnstile", "dock", "barrier", "pier"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438770.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581755, "question_id": "gyQf7j3RNGzff4CWho7g5H", "question": "What college name begins with the first three letters on the sign on the left?", "choices": ["vassar", "fordham", "adelphi", "binghamton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581755.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151128, "question_id": "gyddsj28k99QyB6baApHmN", "question": "What sound producer is located above the timepiece here?", "choices": ["trumpet", "buzzer", "kazoo", "bell"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151128.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229450, "question_id": "h2mBqDjrWgsWgYwLwZK6ax", "question": "What is the green item that is in abundance here?", "choices": ["peas", "cucumber", "lettuce", "squash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229450.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345021, "question_id": "h3E3369DHETQzJx8FoPtF3", "question": "Where is this game being played?", "choices": ["court", "field", "grass", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345021.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424758, "question_id": "h3d6D2rJ4sF5sks8mEsGSB", "question": "How far is it to Ifrane from here?", "choices": ["9 centimeters", "9 kilometers", "9 meters", "9 milimeters"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424758.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388975, "question_id": "h7NUTYodVB8nmbLGaZ5idN", "question": "What type of area is shown?", "choices": ["urban", "desert", "forest", "rural"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000388975.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 302748, "question_id": "h7mNXPawoQUzgH5eJvyiaQ", "question": "What is the clothing all these surfers are wearing?", "choices": ["wetsuit", "floats", "dresses", "jeans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000302748.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91339, "question_id": "h9rUcfVgw9eFDQku2Qo3TD", "question": "What is the contents of the bottle shown here produced for?", "choices": ["laundry", "dishes", "drinking", "bathing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091339.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365539, "question_id": "hAW2jPfgHkWwyeVmQwEQK5", "question": "What animal might you find in the area to the left?", "choices": ["dog", "fish", "elephant", "bat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000365539.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420430, "question_id": "hBZGgYAFDwHvEMz65jQYbT", "question": "What is usually found in this room?", "choices": ["charcoal", "soap", "freezer", "arcade machine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420430.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 338150, "question_id": "hCnBX9ouXche2AwG2kH7Ac", "question": "What is the likely danger faced by this person?", "choices": ["tornado", "earthquake", "storm", "tsunami"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000338150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 184564, "question_id": "hE8a88Er3btWbKcT4TLwmc", "question": "Where was this vehicle stored?", "choices": ["boat slip", "carport", "garage", "hangar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000184564.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255939, "question_id": "hFXypgsLBh5LPk4TPefkAa", "question": "The man in which color jacket will get in the tunnel first?", "choices": ["red", "white", "blue", "green"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000255939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315506, "question_id": "hFawE9ASLUMdgLyGNgqttx", "question": "What are the two dogs fighting over?", "choices": ["tennis ball", "frisbee", "plate", "dog treats"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315506.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451100, "question_id": "hGFVMveMBGiS9Gx99T97kP", "question": "What type of animal is this?", "choices": ["stuffed", "domestic", "mammal", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451100.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 341936, "question_id": "hGPxAXHiBCpggEMXdDPnUf", "question": "What type of board is the man riding on?", "choices": ["skimboard", "wakeboard", "surfboard", "longboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000341936.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556153, "question_id": "hGafoAdRj6wc9VkeoH9TLZ", "question": "What is the woven floor covering underneath the electronics constructed from?", "choices": ["bamboo", "tatami", "cloth", "jute"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000556153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 225821, "question_id": "hHKcLqqZnq9nVQuKiCdK68", "question": "What type of site is shown?", "choices": ["shopping", "natural", "historical", "modern"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000225821.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373438, "question_id": "hHf8KdJz4j5XMcV3shKN3L", "question": "What would cause the surfers to watch the waves rather than surf?", "choices": ["lighting", "temperature", "season", "weather"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373438.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 345259, "question_id": "hHkagnSqusjZmSpD2v3xnH", "question": "What does the long nose belong to?", "choices": ["anteater", "bear", "cat", "elephant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000345259.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 528237, "question_id": "hJ2P6NqzTqrGpssCDpaHA7", "question": "To what does this person listen?", "choices": ["static", "camera", "heart", "phone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000528237.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565685, "question_id": "hJVH4ciBox9SGGEfhC6Ce8", "question": "In which location might the lady shown here be?", "choices": ["arena", "hotel", "motel", "nursing facility"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000565685.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377247, "question_id": "hJXW9omaer6fSmykSue58s", "question": "Which one of these workers is most likely to use this truck?", "choices": ["teacher", "tailor", "potter", "construction"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000377247.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156331, "question_id": "hJZGnfYEnFmxU7hV3NJZnC", "question": "Why is the truck's door a different color than the truck?", "choices": ["rust", "in accident", "different style", "owner confused"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000156331.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158789, "question_id": "hK8yJsA66u2RgpE7a5TbwJ", "question": "The woman knows the man holding her how?", "choices": ["mother", "just met", "strangers", "intimately"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158789.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 331948, "question_id": "hL3MzpEd6k9bXvPQJscWwe", "question": "What is the person eating this pizza also likely doing?", "choices": ["browsing internet", "completing homework", "watching tv", "cleaning house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000331948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367895, "question_id": "hQF4wvY9cT3c56crDVH8EM", "question": "What region would this item be a rare find?", "choices": ["united states", "india", "antarctica", "brazil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367895.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566275, "question_id": "hR6gZE54wM6rR8XKCM23ST", "question": "What would normally be where the dog is?", "choices": ["person", "bird", "cat", "lamp"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566275.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262725, "question_id": "hRjEBaWV6g2iXTcVhYpWgo", "question": "What do the man and woman want to do with the large white object?", "choices": ["throw it", "paint it", "eat it", "sculpt it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262725.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390620, "question_id": "hT9aixNQsNS6K5w2Ro233G", "question": "The man looks most like what singer?", "choices": ["buzz osborne", "perry farrell", "taylor momsen", "biz markie"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000390620.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 575085, "question_id": "hTSpr5nK2FoeHB7PuG9HXt", "question": "What is in the water?", "choices": ["cat", "tree branch", "ladder", "people"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000575085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 521780, "question_id": "hToJWzGrwUdsV82wQbvSmy", "question": "What is the dog doing with the blue plate?", "choices": ["biting it", "attacking it", "smelling it", "licking it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000521780.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 418153, "question_id": "hTwcBXmJWST9As2bBLZWrd", "question": "What is the dog looking to score?", "choices": ["sandwich", "hamburger", "dog bone", "vitamins"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000418153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 408097, "question_id": "hUgJC8SDGxT2eYywUsWiyw", "question": "What store would the child go to to get this food item?", "choices": ["nathan's", "dunkin donuts", "mcdonald's", "chipotle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000408097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 151209, "question_id": "hUrWeUzapR7shsPbfWPMqK", "question": "These food items usually come in what shape?", "choices": ["triangle", "octagon", "round", "star"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000151209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369990, "question_id": "haSkEQB2cq4rBDkSZ2nJyJ", "question": "What material is the screen door in front of the cat constructed from?", "choices": ["iron", "copper", "steel", "aluminum"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369990.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273720, "question_id": "hbQFunzqbxRpCiJ67mSZg8", "question": "What is the function of the white foam?", "choices": ["hydrating", "cleaning", "whitening", "neutralizing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273720.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422317, "question_id": "hdm2n8bvwseXdYm9fXcVFS", "question": "The texture of the court is?", "choices": ["soil", "cement", "mud", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422317.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192611, "question_id": "hfz8YbaaNXDcwNeJqKvsWf", "question": "What language used in this book?", "choices": ["french", "english", "spanish", "malay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192611.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 287283, "question_id": "hhJvXih2habbZGPWEa7pA8", "question": "What can be accessed from the small yellow object?", "choices": ["water", "electric", "assistance", "gas"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000287283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318140, "question_id": "hhqWdoMDuWr6EF6VoekHxN", "question": "What Australian city is this airline based out of?", "choices": ["canberra", "melbourne", "sydney", "brisbane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000318140.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466439, "question_id": "hitfWeJ5QB82uXeaS4zcdq", "question": "The appliances are probably what material?", "choices": ["cloth", "ceramic", "slate", "steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466439.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 386173, "question_id": "hjL24QEoex3ZXyka3Ghw65", "question": "Where can you find this street?", "choices": ["norway", "italy", "finland", "denmark"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000386173.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254707, "question_id": "hkf9emwT4uV5soTzCuGVDT", "question": "What is a famous store that serves things like this?", "choices": ["wayside", "shelter", "subway", "station"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117484, "question_id": "hkv7Q9YP2FZZ5j4tcGAQJ7", "question": "What type of transportation is absent in the signs here?", "choices": ["cycling", "running", "walking", "bussing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117484.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349626, "question_id": "hok7CGjYNxQ76yqWuxJwzP", "question": "Why are the logs burning here?", "choices": ["heating food", "destroy evidence", "heating neighbors", "provides warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515596, "question_id": "hpbkrCnHtst5mEaTucZWW6", "question": "Which food has the least amount on the plate?", "choices": ["potato", "avocado", "tomato", "carrot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515596.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24065, "question_id": "hphpZEW3HMsjU6q8Gh9cYe", "question": "What season is happening here?", "choices": ["fall", "winter", "spring", "summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112367, "question_id": "hrfRWraCfmaBj2MnMuiDwb", "question": "What is the dog doing?", "choices": ["racing", "watching", "herding", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112367.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123428, "question_id": "hsZkPvjQ48G7hJ4srRhMTz", "question": "What body part is taller than the rest of the animals around the central bird?", "choices": ["legs", "mouth", "beak", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123428.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118675, "question_id": "ht8gXA4gyvJn7ebiLkZwWF", "question": "Why is he standing that way?", "choices": ["is falling", "is lost", "is tired", "maintaining balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118675.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334443, "question_id": "htVX782umwiYLd9oKKVoVU", "question": "What metropolitan area does this company serve?", "choices": ["denver", "valparaiso", "miami", "el paso"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334443.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 452357, "question_id": "hvkZjXhJF5bG7vnqtURALe", "question": "What does the skateboarder have on?", "choices": ["tie", "hat", "armor", "clown nose"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000452357.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 403426, "question_id": "hwfPt8qrPxB8BQUk3GygS9", "question": "Who can go forward here at this moment?", "choices": ["ticket holders", "anyone", "taxi takers", "no one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000403426.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555846, "question_id": "hwnqUJkanCFcpS5WhKhiF4", "question": "What are these birds standing on?", "choices": ["street pole", "antenna", "cage", "fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000555846.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6377, "question_id": "hwqY5dehCy3AnbyPXtXJZ3", "question": "What type of model is the dark car across the street?", "choices": ["sedan", "taxi", "minivan", "sports car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 105203, "question_id": "hyM3EgWgoYgAts3Xv6CoY5", "question": "What does the man want to do with the red object?", "choices": ["taste it", "pocket it", "throw it", "eat it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000105203.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320781, "question_id": "i2aVRCiiuqws8kXD2nLg7L", "question": "What job would someone have who would be allowed to make a hose connection to this item?", "choices": ["computer programmer", "mom", "fireman", "coast guard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000320781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 193762, "question_id": "i4sWAnTKxHBP2GZPmBsN7J", "question": "What does the light coming from the signal tell people?", "choices": ["stop", "slow", "turn", "go faster"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000193762.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158150, "question_id": "i6Z9Fb7oPmHq2MSzM4JJWJ", "question": "What would be needed to make this toilet work?", "choices": ["gravity", "plumbing", "solar panels", "electricity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158150.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 320860, "question_id": "i7ctCL2ZiU8fc6dKzK4i4v", "question": "How is the man balancing like that?", "choices": ["special effects", "wind", "ropes", "strong"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000320860.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562459, "question_id": "i7sgFRFGGCKsjhH9BUqKmB", "question": "Where is this garden?", "choices": ["hospital", "park", "school", "yard"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000562459.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101375, "question_id": "i9QuWfcjjsxVXQ3yArxVfV", "question": "What kind of farming is this?", "choices": ["vertical", "food forest", "raised bed", "aquaponics"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000101375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161864, "question_id": "i9nY6ir8FVP5fPdT756UnV", "question": "What type of animals are seen in the photo?", "choices": ["bear", "hyena", "dog", "gorilla"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490249, "question_id": "iAuB8jMenDvYANgmKWpd54", "question": "What is in the direction the animal is looking?", "choices": ["helheim", "devil's sinkhole", "tartarus", "ursa major"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172118, "question_id": "iBAeBbKAr6HQJbChigZqxA", "question": "What is the colorful item on top of?", "choices": ["flower pot", "box", "refrigerator", "bed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 114948, "question_id": "iBMFbbrARFMKZhwgXHfMAd", "question": "This airplane is following up on what type of disaster?", "choices": ["earthquake", "tornado", "pollution", "terrorist attack"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000114948.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550668, "question_id": "iBx3Y6upMHbcKjuypnepSf", "question": "What would be the result if this advice was followed?", "choices": ["communism", "uniformity", "peace", "conflict"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550668.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339726, "question_id": "iCMX66AkUetbQRQDbbb5Uf", "question": "What number is on the front of the bus?", "choices": ["652", "387", "908", "042"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 348153, "question_id": "iCmjBdZienZbor2UpTyqFG", "question": "What might be causing the white smoke to come from the tent?", "choices": ["water", "piping", "fire", "minerals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000348153.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 63106, "question_id": "iDMBp76GXow85XXHY6NNBy", "question": "Where are the cows being shown off to an audience?", "choices": ["country fair", "school", "park", "amusement park"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000063106.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 209402, "question_id": "iDs8rFqkYvhqr74vJjGjbQ", "question": "What age group does this person belong to?", "choices": ["30-45", "50-65", "10-25", "20-35"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000209402.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 274623, "question_id": "iEsP9p4KnHkbx7dNLRhafW", "question": "What is the person pictured above doing?", "choices": ["snowboarding", "skating", "gliding", "jumping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000274623.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507860, "question_id": "iGrUCCtkCSzjAQScaHemJi", "question": "What is inside the cup?", "choices": ["water", "nuts", "nothing", "milk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507860.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81863, "question_id": "iH3WBBKYPV65oatEgKPSyU", "question": "Which object looks most similar to this cat's eyes?", "choices": ["marble", "playing cards", "cardboard box", "mailbox"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438951, "question_id": "iJTZR9tQHNRR5UtHZo8ws6", "question": "What are the long streamers called?", "choices": ["nose", "wings", "tail", "belly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438951.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325735, "question_id": "iJbGPCRKDuQX2KVLweRQDR", "question": "What type of kitchen is shown?", "choices": ["commercial", "hospital", "residential", "mobile"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000325735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 417435, "question_id": "iM4QxMfkqeckdx9jhy3Va7", "question": "How often can the white food be harvested?", "choices": ["triennially", "annually", "biannually", "quarterly"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000417435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548193, "question_id": "iMDTgQcrLygVdTrSL7WEgf", "question": "What is the complete first word on his shirt?", "choices": ["snowboard", "showboard", "slowboard", "stowboard"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548193.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494929, "question_id": "iPxANWq54uzNPSXKSJ8h3b", "question": "How are the top and bottom of the largest item here similar?", "choices": ["shape", "color", "height", "length"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000494929.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79148, "question_id": "iXbSoevbYAKACH3uC6e9n9", "question": "The food pictured is part of which cultural cuisine?", "choices": ["japanese", "vietnamese", "chinese", "italian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000079148.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456474, "question_id": "iYVQsL3N3YjzqeGnCJFhnU", "question": "What is the language below the English warning?", "choices": ["latin", "greek", "french", "german"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 479059, "question_id": "iZzcxnNnJ6YCAZarZxPhDe", "question": "What is this sink used for?", "choices": ["develop photos", "wash hands", "clean dishes", "bathe dogs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000479059.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356234, "question_id": "ibZnPmwuskJYgoZRmzaAiM", "question": "What animal does the above doll resemble?", "choices": ["polar bear", "dog", "none", "plain bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131671, "question_id": "idTLgPoGnLyqEEFFzJRJSK", "question": "How many calories are in a regular slice of pizza?", "choices": ["100", "300", "250", "800"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000131671.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 172130, "question_id": "idc598SQ3QJXiYRF3ypMzM", "question": "What is the nature of this animal's fur?", "choices": ["striped", "bald", "nonexistent", "monocolored"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000172130.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 477541, "question_id": "iemkTy85nPxHKyt93XAJfs", "question": "What activity occurs in the alcove here?", "choices": ["showering", "sleeping", "bathing", "brushing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000477541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263645, "question_id": "iepGwxwmLodUnfDBrr48sP", "question": "What did the bird ruin on the electronic device?", "choices": ["motion sensor", "power button", "power chord", "scroll wheel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000263645.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51670, "question_id": "ifvtdGHFsyTiJD6SRcetHG", "question": "In which Asian country is this bathroom located?", "choices": ["south korea", "china", "vietnam", "japan"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76783, "question_id": "ik4ALYUi62ivwthhyz4N2A", "question": "Where are these animals located?", "choices": ["desert", "forest", "zoo", "beach"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 300832, "question_id": "ik8Zgn3ktqfdUDUzyRz42C", "question": "What does he hope will be the outcome of this game?", "choices": ["injury", "loss", "win", "tie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000300832.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67421, "question_id": "ikMAeAC9VyoLHMYmuUCxdi", "question": "What type of signs are on the clocks?", "choices": ["directional", "brand", "warning", "price"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000067421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 465959, "question_id": "ikuqPVr25BWicRncqFcyUw", "question": "What location is this fire hydrant?", "choices": ["driveway", "parking lot", "median", "curbside"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000465959.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445052, "question_id": "ikwYLQ2pZh3zZERdG2UaNG", "question": "What is the comb and watch sitting on top of?", "choices": ["couch", "bed", "sink", "dresser"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000445052.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272226, "question_id": "inESbbfsRdnBFWzD5vLPBG", "question": "How many different ways is the baby putting the food in her mouth?", "choices": ["two", "five", "one", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272226.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 32749, "question_id": "iqYdmUkJCrwekc3bP9ir96", "question": "What reason could the yellow object be where it is?", "choices": ["sentience", "blizzard", "eruption", "wind"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000032749.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 263421, "question_id": "irZWaYyFL94rpfnnzCVyEe", "question": "What type of sink is shown?", "choices": ["bathroom", "kitchen", "workroom", "commercial"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000263421.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451816, "question_id": "isdpwWg36ZmXsiH5n55Yex", "question": "What type of clothing is the person pictured above wearing?", "choices": ["casual", "semi-casual", "official", "costumes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451816.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 249642, "question_id": "iuQGidCtmHvqHiTVMYD62H", "question": "What does the train run on?", "choices": ["coal", "steam", "electricity", "diesel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000249642.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371152, "question_id": "iuZwrAciyC54pppL8ynoYU", "question": "What are the people looking at?", "choices": ["giraffes", "signage", "sand", "tour guide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000371152.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61581, "question_id": "iv8RpKi6MmxgTSRJT4VaH6", "question": "What type feet does this animal possess?", "choices": ["hooves", "claws", "talons", "flippers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061581.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 237631, "question_id": "ivERbnSHWg34aEgLamApk9", "question": "What is the signage indicative of?", "choices": ["intersecting roads", "someone's home", "danger ahead", "garage sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000237631.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198896, "question_id": "ivj3gdAvBjiGMfr3o6cjcs", "question": "Where is this station?", "choices": ["suburb", "city", "country", "university grounds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198896.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473947, "question_id": "ivjPdESybWbBRsknqE4JDM", "question": "Why is the man in the helmet holding a sword by his side?", "choices": ["fashion", "offense", "defense", "ceremony"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104672, "question_id": "ivzBdrjVampsfELFVTvhUx", "question": "What is the lowest part of the animal's head here that is visible?", "choices": ["nose", "eyes", "ears", "forehead"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104672.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 59645, "question_id": "iyHF8bcDFYPMM666ZLLD6S", "question": "How are these devices powered?", "choices": ["electric", "coal", "sun", "battery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000059645.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339756, "question_id": "iyvXyyWdu2dk7PhoPwKzpn", "question": "The loop means they can wear these without doing what?", "choices": ["washing", "tying", "ironing", "sewing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18696, "question_id": "j5a7qsL68bZR8MBhNPLzWN", "question": "What action is about to be taken with the scissors?", "choices": ["poking", "cutting", "slicing", "stabbing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000018696.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295238, "question_id": "j5oimpZ2vmTzugwHbZpUfZ", "question": "What condition is the road in?", "choices": ["disrepair", "dirt", "pristine", "under construction"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295238.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 361570, "question_id": "j6SgGnMq2KLFymBAp7HVfK", "question": "Why is the sign in Hindi?", "choices": ["in china", "in india", "for amusement", "mistake"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000361570.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 353020, "question_id": "j7U7yYBpCCZ43r9jLLck9M", "question": "What type of environment is this train most likely in?", "choices": ["forest", "marine", "tundra", "desert"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000353020.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 226052, "question_id": "j7qC3vnzrAUiF85PiHcdJU", "question": "What is the importance of the following equipment?", "choices": ["landmark", "decoration", "water point", "extinguish fire-point"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000226052.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 95369, "question_id": "j8ZQmdXf8qqW3h2LqNkCkV", "question": "What is the area to the left of the sink used for?", "choices": ["showering", "eating", "cooking", "storing clothes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000095369.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569181, "question_id": "j8oMKD9TMgHJYxwQPLoYCm", "question": "How many people sleep here?", "choices": ["one", "four", "two", "eight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223969, "question_id": "j9VVYtedsYU7wA9rbM2jef", "question": "Which part of animals seen here is likely to be used by this person first?", "choices": ["hoofs", "hair/wool", "tails", "horns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223969.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551628, "question_id": "j9WJ27GZPpVxgREF2YesFj", "question": "What can be done with this appliance?", "choices": ["call", "clean", "view", "blend"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551628.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 175243, "question_id": "jBa4eXMLMhwJDxkhtthkHr", "question": "What natural structure is this?", "choices": ["cave", "tree", "ocean", "mountain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000175243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 195393, "question_id": "jFLAwxpHQ5HzVCr4raguwU", "question": "Where is this game being played?", "choices": ["mud", "court", "beach", "field"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000195393.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99262, "question_id": "jGVAJ5MSRj4Mq2szwZ4WD2", "question": "What type of room is this?", "choices": ["hotel", "exam", "escape", "conference"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099262.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 486279, "question_id": "jGs4uQqQqoJEXhuEGSq8cq", "question": "What is the profession of the person that will prepare this item?", "choices": ["painter", "janitor", "chef", "teacher"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000486279.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414265, "question_id": "jJM6S99WwxT7k3Y7oPaapD", "question": "Elevated people here want to take what of the Giraffes?", "choices": ["ears", "food", "photos", "horns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 72388, "question_id": "jJcBKUnGLZzZWMAVxwd2LP", "question": "What kind of weather is the woman most likely selling accessories for?", "choices": ["hail", "heat", "snow", "rain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000072388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387612, "question_id": "jK6vLNMrctejTnUN9qfVky", "question": "What is the item on the bird's leg for?", "choices": ["medication", "decoration", "identification", "safety"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387612.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 178132, "question_id": "jLA9Vk6bKB5nLVc4qHtUNf", "question": "What type of transportation is this?", "choices": ["rail", "air", "road", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000178132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299835, "question_id": "jLdvkRZ4yUpuuFiQv6pVEw", "question": "What food can be made from this animal's meat?", "choices": ["fish cakes", "pork rinds", "beef taco", "salad"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 94896, "question_id": "jLogtGPCcJkXy5RGRnveL5", "question": "What might someone need to do if they are getting ice cream out of this appliance?", "choices": ["reach up", "bend down", "open left", "open right"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000094896.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104072, "question_id": "jQ8s9Cv5W83SBzchWnDdKx", "question": "How is this food prepared?", "choices": ["frozen", "boiled", "baked", "fried"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29258, "question_id": "jRaEZE2t3X9TaX7ghQXsts", "question": "What is the purpose of the wooden apparatus?", "choices": ["flatten pizza", "serve pizza", "grill pizza", "transport pizza"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000029258.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 33273, "question_id": "jSA9r5ECeWbz6Pui55tLLX", "question": "What place is known for this type of food?", "choices": ["olive garden", "nathan's", "mcdonald's", "subway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000033273.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515116, "question_id": "jTd8CG6gU9z5ZFcJm9ALBg", "question": "What religion do these arches belong to?", "choices": ["islam", "judaism", "buddhism", "christianity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000515116.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169037, "question_id": "jTw6vfeFqsH7vekaMyAWBb", "question": "What is hanging up on the wall?", "choices": ["underwear", "socks", "towel", "shorts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169037.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283544, "question_id": "jTzSxQcb3NztPd5wR2aHAX", "question": "What type of phone is the woman using?", "choices": ["corded", "cordless", "rotary", "pay"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283544.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552418, "question_id": "jbjnmBLm6SE8Ze4hSX88zt", "question": "During which season was this moose walking along the road?", "choices": ["spring", "winter", "fall", "summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552418.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36008, "question_id": "jc87afTorBrfRJUqoG2NSA", "question": "What is a word that can describe this group of animals relaxing like this?", "choices": ["dazzle", "stampede", "school", "flock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 459013, "question_id": "jcAsTa9yWuEmX6RSnPj6BJ", "question": "What will likely happen to the hair on this animal?", "choices": ["shorn", "tyedied", "grow forever", "braided"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000459013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18696, "question_id": "jcUA2wa9ZeJXQZxPnqvh5K", "question": "What activity is the person performing?", "choices": ["cutting", "slicing", "stabbing", "splicing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000018696.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 562785, "question_id": "jeHcqWbuw57rGEG7ysSvxK", "question": "Which object would typically have water in it?", "choices": ["wood floor", "saw", "doorway", "toilet bowl"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000562785.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 162008, "question_id": "jeMrGQgXVZVyvUasbWAMCE", "question": "What is the tallest living thing in the image?", "choices": ["zebra", "trees", "giraffes", "elephant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000162008.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102026, "question_id": "jeNVvuQpRMvYctTaWbTbZd", "question": "What type of food must cats eat?", "choices": ["viruses", "bugs", "animal protein", "vegetables"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 250861, "question_id": "jhHVwyrpvQbsij8RSRGrXf", "question": "What do the coils seen here make it easier to do?", "choices": ["heat food", "open book", "prevent pregnancy", "vape"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000250861.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 434889, "question_id": "jkboVYwZ4SLc5r2MDa49Wa", "question": "Why has he covered his head?", "choices": ["protection", "uniform", "costume", "cleanliness"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000434889.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 161864, "question_id": "joSU3yDyjf6ceH97yEHBfX", "question": "What type of animals are shown?", "choices": ["hamster", "bear", "chicken", "rooster"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000161864.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546735, "question_id": "jpQrrzgwFsgQcXfr5iQSr9", "question": "Which one of these people groups have historically lived alongside this animal?", "choices": ["malay", "aztec", "inuit", "tamil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519881, "question_id": "jsCwMy3D3g9qyubHJzEAM7", "question": "In film and television this animal is often depicted as eating what?", "choices": ["bark", "ants", "fire", "honey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519881.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 498452, "question_id": "jtvRkF6Usm7KP8TsYoRatv", "question": "What is the duration for hand wash to attain perfection?", "choices": ["10secs", "20secs", "30secs", "40secs"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000498452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517358, "question_id": "juLU7w3dMZFWWa5AKrQiH8", "question": "What can be done with this device?", "choices": ["open", "turn on", "turn over", "close"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000517358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 391386, "question_id": "jwGFuAnMgPKVw3s7D28mHD", "question": "Which country is the subway station in?", "choices": ["turkey", "ukraine", "romania", "bulgaria"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000391386.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 11621, "question_id": "jxCwu8CH4ZyWMzYp9ZzLma", "question": "What's on the plate?", "choices": ["croissant", "bagel", "hockey puck", "chocolate donut"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000011621.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204756, "question_id": "jxFQ3uyB4vHQ3w4jv7oZ6z", "question": "What type of light is shown?", "choices": ["spot", "moon", "sun", "flash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280717, "question_id": "jxQGwNQR7kF9ztN94TSUu9", "question": "Which food ingredient has a creamy texture?", "choices": ["tomato", "lamb", "bread", "eggplant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280717.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355885, "question_id": "jzV5rB2bbyVL5UPXMqrAwX", "question": "What is the man throwing in the air?", "choices": ["puck", "hat", "ball", "frisbee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355885.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147781, "question_id": "jzmjCpjoUfemEo8c2XfgzC", "question": "What is the person in the white hat here taking?", "choices": ["picture", "nothing", "rock", "rest"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147781.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 259413, "question_id": "jzoyNjyGJWfzYsTeuBgfzM", "question": "What diet does the person who placed the eating animals sticker most likely observe?", "choices": ["keto", "atkin's diet", "sugar free", "vegetarian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000259413.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446370, "question_id": "k6mtTMLjAKPZ6GEnur7ZYF", "question": "What will turn on in a few hours?", "choices": ["radio", "television", "light", "computer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446370.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 182831, "question_id": "k74LZsgVegfxHk3sGKCHNt", "question": "What state is next to the state where this Amtrak train is from?", "choices": ["arizona", "washington", "new york", "florida"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000182831.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179028, "question_id": "k7w9qVw67RTKJvhRbAbhRw", "question": "Why does the girl have her head covered?", "choices": ["religion", "fashion", "protection", "warmth"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179028.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177176, "question_id": "k8Wxf28Pb8QaZZ9ZN5rFij", "question": "What is the man doing with his finger?", "choices": ["eating", "pointing", "scratching nose", "stirring drink"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000177176.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 43623, "question_id": "k8yac4HxSNZbiAgF2E5mMt", "question": "What does the yellow object connect to underground?", "choices": ["water", "fire", "power lines", "electricity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000043623.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 500615, "question_id": "k9DZGsLeZAmPFrxtqQAoAf", "question": "The animal here is curious about what?", "choices": ["tree", "cage", "camera", "it's calf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000500615.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 356346, "question_id": "kDX88EEunrsosGvwyyc32W", "question": "What are the two sheep on the side trying to get from the big sheep's body?", "choices": ["milk", "pee", "bugs", "meat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000356346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462064, "question_id": "kDoSgQsTrGoZpSYyFvdhJq", "question": "What birds are flying in the air?", "choices": ["cardinal", "raven", "crow", "seagull"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000462064.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463910, "question_id": "kGyRr5JEQqEgeVtguN8Mj9", "question": "This man looks most like what character from The Wire?", "choices": ["beadie russell", "omar", "stringer bell", "nick sobotka"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000463910.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 350705, "question_id": "kKoUNK76SrDCfkc22EU6rP", "question": "How many types used to throw the disc?", "choices": ["nine", "two", "four", "three"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000350705.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 432585, "question_id": "kKoqSpPzRBYta6aBereSY8", "question": "What number is above this woman's head?", "choices": ["nine", "seven", "eight", "one"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000432585.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 231303, "question_id": "kL9abjoTmnDXXDX32CgAkF", "question": "What is the dog's nose against?", "choices": ["wall", "fence", "window", "bowl"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000231303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 76231, "question_id": "kNTJRV9naDpkcHfRhmqTsY", "question": "What is this type of counter called?", "choices": ["glass", "tile", "granite", "plastic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000076231.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 581305, "question_id": "kP7ZWBMCDzkMYPz8spW4tj", "question": "Where is this cat located?", "choices": ["hospital", "barn", "field", "home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000581305.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 219234, "question_id": "kPBRZxm9md6Pyvx5NPeuix", "question": "What type of building is behind the stop sign?", "choices": ["clothing store", "auto store", "restaurant", "bank"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000219234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142292, "question_id": "kPC3ddc2cNfdRzrJgzJ5nq", "question": "What technique was used to make the table covering?", "choices": ["weaving", "needlepoint", "macrame", "crochet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000142292.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135629, "question_id": "kPYqWBwqnD8Vm7VRwFoTpi", "question": "When is this kind of plane commonly used?", "choices": ["party", "war", "public transport", "event"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135629.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262602, "question_id": "kQ3Dib2pDaskeLMAAhJpDL", "question": "What is the white area shown?", "choices": ["snow", "cloud", "water", "sand"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000262602.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36350, "question_id": "kQEfqDtFYSNoG5dTzRUWZ5", "question": "What kind of light bulb is being used for the bathroom?", "choices": ["hps", "incandescent", "led", "metal halide"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036350.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299078, "question_id": "kSVydiD6WqMkUgLHP28vfu", "question": "This city is often compared to which one of these cities?", "choices": ["new orleans", "helsinki", "las vegas", "houston"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000299078.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517157, "question_id": "kT4Q7M42t8YAaVHy5yoLpx", "question": "What country is this bus part of?", "choices": ["spain", "norway", "uk", "italy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517157.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 297340, "question_id": "kT5i9yGiBCdRDwU3xbLqVo", "question": "What makes the bike sign funny?", "choices": ["it's orientation", "color", "font", "rider"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000297340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 66605, "question_id": "kTDXBsbxwYKeWy3LQRUhCE", "question": "The contents of this vehicle have what property?", "choices": ["frozen", "edible", "live", "flammable"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000066605.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385058, "question_id": "kTHvUyVVnHRcARWHjpXgEZ", "question": "What is on the end of the item in this person's mouth?", "choices": ["bacon", "charcoal", "bristles", "lollipop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385058.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411452, "question_id": "kWhB28GMCxFzcQXTuGWjyG", "question": "What type of animals are these?", "choices": ["stuffed", "bird", "reptile", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411452.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 217277, "question_id": "kXEscxuc94sx3NoqACQRCn", "question": "What type of lighting is the owner trying to create?", "choices": ["decorative", "task", "accent", "ambient"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000217277.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 533340, "question_id": "kXFkmrc77rVBudVTPzPaV7", "question": "What numerical information is shown?", "choices": ["score", "time", "speed", "temperature"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000533340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 242842, "question_id": "kXXm95wy6G8VLCPNNyofLz", "question": "What word is on the sign?", "choices": ["stop", "pressure", "yield", "way"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000242842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 269649, "question_id": "kZjLWsMWKbLwGu9hkYEAto", "question": "What caused the lines in the sign?", "choices": ["marker", "fire", "rain", "acid"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000269649.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 40340, "question_id": "kaF5smgkwm22fargCfYVRm", "question": "What company is known for selling the item that is located in the glass with the spoon?", "choices": ["meineke", "home depot", "carvel", "best buy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000040340.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255127, "question_id": "kaivp4RpXiGhk4cfRehWrC", "question": "What type of metal is being used to create the theme for both structures?", "choices": ["bronze", "gold", "titanium", "silver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000255127.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 488579, "question_id": "kaviKh7HKEV9DseBzMGJx4", "question": "The software is what type of image editing program?", "choices": ["3-d", "raster", "vector", "animation"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000488579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 480426, "question_id": "kcQQY2tKFMkXfBnCNjbsUE", "question": "How many legs would the animal have if their legs suddenly doubled?", "choices": ["two", "twelve", "eight", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000480426.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 457551, "question_id": "kcjFB2JCCmsL8mB55EPfKF", "question": "What type of activity is this with the dog?", "choices": ["kite surfing", "fishing", "dog fishing", "dog surfing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000457551.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340541, "question_id": "kfSo39t4prZy5RVZGPj9gC", "question": "What type of parking is shown?", "choices": ["street", "diagonal", "valet", "lot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340541.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254023, "question_id": "kfkadwyt5fFSxoB5vmkFqC", "question": "What seasoning is being used?", "choices": ["old bay", "oregano", "pepper", "salt"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 204214, "question_id": "kfvijw55fCK5sHspuJ8Yjs", "question": "What part of the dog body is the tool in the dog's mouth used for?", "choices": ["fur", "nose", "paws", "eyes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000204214.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 278327, "question_id": "ki3m7ESFN5Ueu2du5cqiiE", "question": "What is the bird shown here seeking?", "choices": ["eggs", "nectar", "mirror", "mate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000278327.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315167, "question_id": "ki97AkjQNDwXaiUZABd76v", "question": "What is the snow partially covering?", "choices": ["fire hydrant", "cat", "chicken leg", "dog bone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000315167.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295072, "question_id": "kk6dqrjdkPDtwGco5jJiPF", "question": "What item is artificially colored?", "choices": ["sky", "tree", "grass", "hydrant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295072.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317558, "question_id": "kopV9QMNP23WqRQHT5c6vM", "question": "What is the name of this room in a building?", "choices": ["reception", "restroom", "lounge", "kitchen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317558.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 274119, "question_id": "krAvC9VVKebciTWjctWKmp", "question": "What is the highest governing body of this sport?", "choices": ["fifa", "nba", "nfl", "ncaa"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000274119.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 67346, "question_id": "krXirZhv42UXJNRSDDTcMi", "question": "What item here is used to cut?", "choices": ["knife", "saw", "meat cleaver", "scissors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000067346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86347, "question_id": "ksJmDkfX4ePiFsr3fU8Y3K", "question": "What is the flying pattern the planes are flying in usually called?", "choices": ["formation", "exhibit", "discriminate", "unified"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275475, "question_id": "ksrKGdFDaTjvExX79tuEZS", "question": "Who likely uses this bus most frequently?", "choices": ["prisoners", "students", "soldiers", "tourists"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000275475.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 127478, "question_id": "kt7aQLvDm8h7faR2fwabyo", "question": "What is the child holding?", "choices": ["spoon", "tooth brush", "pen", "tooth paste"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000127478.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485641, "question_id": "ktKWVjMmYf5GEKsDiJT92k", "question": "What seems to be missing from this popular dish?", "choices": ["ketchup", "cheese", "olives", "basil"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485641.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 514560, "question_id": "ku7utdkoToiQM99JHYEC8e", "question": "Someone with this type of key chain probably likes what type of foods?", "choices": ["meats", "veggies", "seafood", "pastries"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000514560.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 163915, "question_id": "kubqSxU7zySB3iU4HCLXuo", "question": "What is the only thing in the photo that is to scale?", "choices": ["barge", "boat", "buoy", "water"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000163915.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 179061, "question_id": "kv2b9mtNhcw47BBFKdW9aA", "question": "Who runs this bus?", "choices": ["daycare", "municipality", "retirement community", "school district"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000179061.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 507376, "question_id": "kyNG2dmMWoRRPy7dGUaCBD", "question": "Which item in the plate probably has the most flavor?", "choices": ["brown", "green", "white", "silver"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000507376.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484743, "question_id": "m23LQ4AMDzceJJ6nKXnbGx", "question": "What kind of allergy will prevent someone from consuming this dish?", "choices": ["fish allergy", "milk allergy", "egg allergy", "nut allergy"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484743.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396709, "question_id": "m4kkienGMaFmjp37rCWMop", "question": "What is he doing?", "choices": ["controlling display", "resting", "eating lunch", "eating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000396709.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362767, "question_id": "m5ciToKgmduVivtHqghwxY", "question": "What brand are the skateboarder's shoes?", "choices": ["new balance", "reebok", "adidas", "nike"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104632, "question_id": "mAKNYv5YxkY9Yi8ge93BVb", "question": "What season of the year is it most likely to be?", "choices": ["summer-fall", "winter-spring", "fall-winter", "spring-summer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104632.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188874, "question_id": "mBTBpW5sNtgd6TPhsRrjVs", "question": "What is the use for the structure standing in the dessert with the broken door?", "choices": ["phone booth", "storage", "bathroom", "music"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188874.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 402692, "question_id": "mCGMVvMUXBYM7dSZH8Dcdz", "question": "What looks like it was built in the tree to the left of the dog?", "choices": ["flag pole", "birds nest", "treehouse", "exercise bar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000402692.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221263, "question_id": "mCdPp86BJj9NRKmFubZ54b", "question": "What are the turtles sharing?", "choices": ["drink", "trash", "bed", "food"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221263.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317760, "question_id": "mCpekG5Dy6vnxWWj6x6rJk", "question": "What sport does the glove belong to?", "choices": ["football", "hockey", "basketball", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420439, "question_id": "mDVKqHrPxm3vjMMqVNNgwg", "question": "What type of kitchen is shown?", "choices": ["hospital", "mobile", "commercial", "residential"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420439.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198954, "question_id": "mGSj6mhrPoWFyn9jeA65fz", "question": "In which country is this bicycle located?", "choices": ["united states", "australia", "canada", "england"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198954.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311037, "question_id": "mGv8q9tBVAH2cF4vC2Zd8q", "question": "The green items can provide what?", "choices": ["blood", "wings", "shade", "light"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311037.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462218, "question_id": "mHhbUi72UE7K3XissU7kUN", "question": "This animal's ancestors in the 13th century were persecuted by what group?", "choices": ["dinosaurs", "catholic church", "depeche mode", "hare krishna"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000462218.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 23835, "question_id": "mJy6ZAvgG5WNQTfPSE9oLg", "question": "The people are all wearing what?", "choices": ["jackets", "potato sacks", "boxes", "crowns"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000023835.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 567753, "question_id": "mNCLiiL6UdEQFNB8YqBJzr", "question": "What is this type of clothing called?", "choices": ["outerwear", "underwear", "footwear", "accessory"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000567753.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123735, "question_id": "mQGZkypHvDTb5gKaFcczRP", "question": "What is the stitch on the wall artwork called?", "choices": ["embroidery", "crochet", "purl", "knit"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 81007, "question_id": "mQnjoxpb9nt6FYAfc9HcnH", "question": "What area lies beyond the gray door?", "choices": ["bathroom", "bedroom", "storage closet", "apartment exterior"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000081007.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24676, "question_id": "mSLWQsFsiAjULkpmM2fBA2", "question": "How is this train powered?", "choices": ["electric", "battery", "steam", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024676.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 170067, "question_id": "mSWi5Rmju4rJmdVcU5EoyE", "question": "What could someone use to understand the foreign language of the movie?", "choices": ["subtitles", "remote control", "google", "translator"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000170067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29255, "question_id": "mSdeaDmWHsAbkQi5peZude", "question": "What appliance is the plant coming out of?", "choices": ["dishwasher", "dryer", "toilet", "oven"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029255.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157925, "question_id": "mTeUR97fhMSQVhoBMxkZcc", "question": "What might the bird here find?", "choices": ["horns", "mutton", "nothing", "insect"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157925.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 30367, "question_id": "mU6f4pmww2YCafzNT3ddLC", "question": "What is the bear's expression?", "choices": ["sad", "content", "happy", "calm"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000030367.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 281726, "question_id": "mUn59YjLKDrcrU9AYY8AvL", "question": "At this place can we found this kind of environment?", "choices": ["city", "valley", "mountain tops", "garden"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000281726.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 573569, "question_id": "mVoXZGGVTebeWdqPBBAtgt", "question": "What type of material is being used to wrap the object?", "choices": ["nylon", "terrycloth", "polyester", "cheesecloth"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000573569.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290970, "question_id": "mW8zPC6JHwAXuR3ArgUhMJ", "question": "What kind of pants does the man have on?", "choices": ["shorts", "tights", "khaki", "jeans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000290970.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 155246, "question_id": "mXCjWuJGAi3TLVzMExgWxe", "question": "What person would utilize the red object?", "choices": ["doctor", "firefighter", "dog", "lawyer"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000155246.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355358, "question_id": "mXwQC3xHUYgZrSQGowgZ7M", "question": "Who owns the couch where this person sleeps most likely?", "choices": ["friend", "no one", "parents", "him"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355358.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364144, "question_id": "mYZ8QGF26nshaSQcCnAxxK", "question": "Which reptile is behind the cat?", "choices": ["alligator", "lizard", "snake", "turtle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000364144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 518111, "question_id": "mYgLkn9Gi7XHtg7rCcGpSZ", "question": "The shop seen here provides clothing worn on which body part?", "choices": ["head", "none", "hands", "feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000518111.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41045, "question_id": "mYkgkwmfWkxtdPWxtpY5t6", "question": "Which restaurant title is related to what the man is doing?", "choices": ["concierge", "garbage man", "dishwasher", "server"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041045.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 5771, "question_id": "mYr4fXZBo3Q3CrFjydSgeD", "question": "What hour has recently passed?", "choices": ["1200", "500", "800", "200"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000005771.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 176204, "question_id": "mZJyZyKhRamT4fBzW6SxjM", "question": "Why is there a window above the door?", "choices": ["prevents burglary", "allows airflow", "more sunlight", "emergency exit"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000176204.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 138178, "question_id": "mZimjCrT7s42d5yhjMd5WA", "question": "What is next to the cat?", "choices": ["leeches", "stuffed animal", "meat", "apple"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000138178.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 373553, "question_id": "mce6GHCt4rwhmaaBNa2jnp", "question": "The player that has the white pants and is standing is trying to do make what play?", "choices": ["homerun", "spare", "birdie", "penalty kick"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000373553.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 314230, "question_id": "meDQJYzxrBdsNP9UNbLDtG", "question": "The animal here is doing what?", "choices": ["hibernating", "feeding", "doing nails", "resting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000314230.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 414626, "question_id": "mfbDSzcXUQyjKY74dmWTYf", "question": "This structure is located on what area of land?", "choices": ["outer space", "in air", "inland", "coast"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000414626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187530, "question_id": "mfz4PJifW8pgd3RvWx52x4", "question": "What is this furniture designed for?", "choices": ["sleeping", "eating", "collections", "seating"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000187530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340852, "question_id": "mg3ny3o4PnbBReMECdgDHw", "question": "How would you describe the air?", "choices": ["foggy", "high", "clear", "low"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340852.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362950, "question_id": "mgVbinzUKCJXDM5HmRArUb", "question": "Is this German shepherd breed dog?", "choices": ["maybe", "false", "yes", "no"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362950.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 446828, "question_id": "mhEjPYGL7RBFbtuTtQhisj", "question": "What instrument is he emulating with his racquet?", "choices": ["flute", "cello", "guitar", "violin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000446828.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440775, "question_id": "mi2hdVawxsMrobGf8ed9Be", "question": "What is the square screen near the white platform sign alerting people to?", "choices": ["road names", "birthdays", "weather forecasts", "departure times"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 36283, "question_id": "miSTxid3MBVabUhqHsxfy8", "question": "What is the object the bird is standing on called?", "choices": ["alert sign", "stop sign", "street sign", "warning sign"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000036283.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7751, "question_id": "mikgfyF24rh2NK8Y6x8Gvq", "question": "What are these animals trying to do?", "choices": ["attack", "drink", "hide", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007751.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 100474, "question_id": "minFa9gVF63G6JYn8gzHrg", "question": "What is the name given to this animal?", "choices": ["chimpanzee", "gorilla", "sun bear", "polar bear"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000100474.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 381814, "question_id": "miwFZVUxaXkud4faDBqkdd", "question": "This type of bird is in what genus?", "choices": ["falco", "nyctibius", "canaria", "mullerornis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000381814.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121741, "question_id": "mj3n9urBMQLFfTiakJXwUR", "question": "For what reason was the boy placed in the suitcase most likely?", "choices": ["play", "photo", "experiment", "accident"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000121741.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 425949, "question_id": "mkLhrnACsvjRaug8gV2boM", "question": "What food left the smallest bits on the plate?", "choices": ["meat", "cheese", "bun", "lettuce"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000425949.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207579, "question_id": "mm78oC4CDXg447ZtD5siqY", "question": "What month is he dressed for in the northern hemisphere?", "choices": ["january", "august", "june", "july"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207579.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 218626, "question_id": "mo6QfgpXSfk4Qb7m6BmW7r", "question": "What activity is the bird likely to do soon?", "choices": ["urinate", "breed", "defecate", "eat"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000218626.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 550929, "question_id": "mp2DGwtPBWxzcq4dfpEXPb", "question": "In what year was this clock tower installed?", "choices": ["1998", "1954", "2003", "1979"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000550929.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 83314, "question_id": "mpmC73tWXaENo5TnieEdZo", "question": "What is the cat doing with the water?", "choices": ["selling it", "drinking it", "stealing it", "cleaning it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000083314.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563409, "question_id": "mqNc7PwBegoGq2hRcgW3pT", "question": "What is the animal trying to do near the fence?", "choices": ["lick", "bathe", "run", "mate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000563409.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 26808, "question_id": "msBRPJxwAujGYPqb7gXmvw", "question": "How do these people know each other?", "choices": ["coworkers", "rivals", "family", "teammates"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000026808.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 35261, "question_id": "muovjr7z6hYWQYSErC2cqi", "question": "Which animal is in more danger here?", "choices": ["rhino", "giraffe", "duck", "human"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000035261.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 233333, "question_id": "mvpwGrKdkrhDQQAF3mGxni", "question": "What kind of bread is this?", "choices": ["pita", "bagel", "ciabatta", "muffin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000233333.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324363, "question_id": "mwvJAiszvhAucW4FjujB5y", "question": "Based on the size of the slices who would this pizza be prepared for?", "choices": ["teenagers", "kids", "animals", "adults"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000324363.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409943, "question_id": "mxH79Lv9idNXDyXyDkWYFj", "question": "What is the profession of the operator of this vehicle?", "choices": ["pilot", "steward", "driver", "captain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376664, "question_id": "myETbLYYLGnMsvGKpFKoXH", "question": "What is the dog doing?", "choices": ["eating", "sniffing", "running", "sleeping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376664.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224356, "question_id": "myNmbTFbYhYgdNhkmuJMqm", "question": "What is the time is displayed on the clock in the above picture?", "choices": ["310 pm", "115 pm", "115 am", "310 am"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224356.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102792, "question_id": "n3F6Dirrk5EcR8Le9mLBxo", "question": "What is the green sticker used for?", "choices": ["decoration", "price tag", "anti theft", "closing bag"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000102792.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411286, "question_id": "n3HKE6gUKWBEFj55APvd7r", "question": "What does this definitely NOT run on?", "choices": ["coal", "petrol", "electricity", "diesel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189928, "question_id": "n3q3955NKLNyGP5dRjkK3X", "question": "Watch type of vehicles are parked outside?", "choices": ["ambulance", "school bus", "firetruck", "cab"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189928.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 130855, "question_id": "n4Jn87FwtgeVRYJLW3j5SZ", "question": "What type of transportation is shown?", "choices": ["road", "air", "water", "rail"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000130855.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104759, "question_id": "n55nTGJMULkUoutLHR57ni", "question": "This type of event is referred to as what?", "choices": ["ice breaker", "business casual", "funeral", "red carpet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104759.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577094, "question_id": "n7BGGVR8SsWfb2KHiAdsSK", "question": "What kind of work is required to make this area look as good as it is?", "choices": ["landscaping", "plumbing", "construction", "landfilling"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577094.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 374480, "question_id": "n7d9cL9raQK8WYJKpisy2z", "question": "What celebrity graduated from the college that shares the same name on the top left street sign?", "choices": ["hugh grant", "miley cyrus", "chris adams", "mickey rourke"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000374480.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 445181, "question_id": "n7jG6cx6c75Me2yBgmPFXu", "question": "Why are they posing with the cow?", "choices": ["found it", "like cattle", "showing pride", "for sale"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000445181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6736, "question_id": "n8hQJBCUViMBCtqSxfoXQF", "question": "What type of enclosure is used?", "choices": ["cage", "barn", "gate", "fence"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000006736.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 441497, "question_id": "nA7SUx6Gnccmwx7bKcAJLn", "question": "What type of shot is the woman about to hit?", "choices": ["serve", "backhand", "forehand", "slice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000441497.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 449961, "question_id": "nAhAKy652C3eu8jpVHxm8y", "question": "Why are there so many pillows?", "choices": ["hiding them", "storage", "large bed", "for sale"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000449961.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 419065, "question_id": "nCR8w7MrTALv5XAb34oN8s", "question": "The sheep on the field are grazing during which season?", "choices": ["summer", "winter", "spring", "fall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000419065.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254391, "question_id": "nDkff8AQVzikXKeoxQZxgc", "question": "What wooden item is below this train?", "choices": ["toothpick", "lincoln log", "railroad tie", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000254391.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 330843, "question_id": "nDkzYDh3WAYtYKNkccKpxX", "question": "This animal's fur would give you what clothing?", "choices": ["leather jacket", "cotton t-shirt", "wool sweater", "denim jeans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000330843.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181590, "question_id": "nDvrFEHK66aSAYcQnXfrcn", "question": "What is the pointed red object on the bird's face called?", "choices": ["beak", "horn", "claw", "talon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000181590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 29159, "question_id": "nEBMxgDyVF6hWLRaKYB3Fn", "question": "Where is this bathroom most likely located?", "choices": ["restaurant", "church", "public area", "house"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000029159.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351060, "question_id": "nEYRMGUbd86rLRwmWxW88f", "question": "What kind of lampshade is on the lamp in the corner?", "choices": ["fabric", "stained glass", "plastic", "street"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000351060.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409943, "question_id": "nEwVU7XPGaqhch3zsaDToj", "question": "The name on the front of the bus is closest to the last name of what baseball player?", "choices": ["james hetfield", "otis nixon", "albert pujols", "kirby puckett"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000409943.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 378863, "question_id": "nF3PnZYEHgaU6XK5REjVwD", "question": "What is the white line in the water behind a boat called?", "choices": ["stern", "ripple", "passage", "wake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000378863.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540521, "question_id": "nFJFPi8KZKmh4SPDyHFuas", "question": "The cat's upturned paw is closest to what key on the keyboard?", "choices": ["eight", "space", "enter", "three"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000540521.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485601, "question_id": "nHGXJwdGfQMWgAuW4ZSkWv", "question": "What is the green veggie in the spoon?", "choices": ["green pepper", "asparagus", "broccoli", "zucchini"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490573, "question_id": "nHtqg6HqGYqdFnBFCwDD9X", "question": "The motion of the skateboarder's legs suggests he is going what?", "choices": ["fast", "staying still", "very slow", "slow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 422175, "question_id": "nKihr3wpbzW8ZiEfC6e3Jy", "question": "What should the sign normally say?", "choices": ["one way", "one gay", "one straight", "one day"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000422175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415729, "question_id": "nNcWw3qXFS95Kj9XHvPPRW", "question": "What does the 26 on the snowboarder's chest signify?", "choices": ["weight", "competition number", "age", "time"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415729.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 313775, "question_id": "nSLrmMdquurKCXajo3gLFK", "question": "What features are inside this man's mouth?", "choices": ["tobacco", "bristles", "mouthwash", "pipe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000313775.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 118704, "question_id": "nSg2soUGUTyKbmNhnGbpVg", "question": "Which animal has the brown spots?", "choices": ["dog", "zebra", "giraffe", "cheetah"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000118704.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 141214, "question_id": "nT4vTiEunBPkRtTUrtFDDi", "question": "What kind of location is shown?", "choices": ["residential", "commercial", "desert", "coastal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000141214.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475573, "question_id": "nTMXvFknZbFnA3f6rgLGpQ", "question": "What type of people typically use these enclosures?", "choices": ["pastors", "police officers", "teachers", "bus passengers"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000475573.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 166772, "question_id": "nUthHpBNrYs25kcCcA9YvH", "question": "What must the plane shown here do before beginning takeoff?", "choices": ["pray", "reverse", "nothing", "taxi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000166772.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243023, "question_id": "nVPh8WbSfFDSPoDiJ5VgBL", "question": "In what country is this airline headquartered in?", "choices": ["india", "japan", "korea", "china"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243023.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90580, "question_id": "nW7xnsQ88t98Wk8JBU7Ti7", "question": "The book features what aspect about living in a city?", "choices": ["dining", "exploring", "working", "nightlife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000090580.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490946, "question_id": "nWhSbMwVD5zi96KRZ4RVWi", "question": "What created the smoke by the man's mouth?", "choices": ["pipe", "cigar", "cigarette", "vape"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490946.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387637, "question_id": "nYztXGLCQKCaNuZoU5qYje", "question": "What animal is used as a vehicle on this road?", "choices": ["sheep", "cow", "horse", "elephant"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000387637.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440676, "question_id": "nZHALZnVL86xKG8g6bvBzs", "question": "What word can be related to the name of this street?", "choices": ["royalty", "presidency", "checkers", "masculinity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000440676.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 200393, "question_id": "nbk2WR4B4NQW6qsvKm3wYC", "question": "What is he about to do?", "choices": ["land", "jump", "roll", "flip"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000200393.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570157, "question_id": "neKGX33tRH6GFWcLDPR4U9", "question": "What is the person trying to do?", "choices": ["lift", "skate", "sit", "sleep"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000570157.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362286, "question_id": "negv2fMUSG3wK7zgycD4fW", "question": "What kind of humor is present on the sign?", "choices": ["dirty", "irony", "childish", "nothing"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000362286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 53158, "question_id": "nfPoxHnpdFVbVLnnyNt2CX", "question": "What digit is hidden by the rider's foot?", "choices": ["four", "three", "one", "nine"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000053158.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146688, "question_id": "nh2RLPqKmeCHjGRBvaeSNx", "question": "The face shown on the clock here is meant to be what celestial body?", "choices": ["moon", "sun", "mars", "venus"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146688.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 438739, "question_id": "nheRpqDYZtLBMmtKpD2pab", "question": "What is wrong with the pink pole?", "choices": ["fading paint", "unstable structure", "blocking view", "graffiti"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000438739.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376169, "question_id": "njRps4sjEEjdCDBbzMRAbE", "question": "What type of food is shown?", "choices": ["pizza", "wrap", "taco", "empanada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376169.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442583, "question_id": "nkhP9D3rDG2BfyQVBn2UHj", "question": "What do you add to prepare the biscuits?", "choices": ["butter", "water", "milk", "cheese"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000442583.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 27760, "question_id": "nkqQRm79UA5zWbQkYm4qe9", "question": "What is she doing?", "choices": ["selling skis", "hiding", "taking pictures", "freezing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000027760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576529, "question_id": "nksn3xxu2Pnmj5iQTrw9E3", "question": "In 2012 this area was used to film a movie about what princess?", "choices": ["diana", "snow white", "cinderella", "tiana"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000576529.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61810, "question_id": "nm8gcnkfdXWa9NVNGv8sjR", "question": "What type of cargo is this train carrying?", "choices": ["coal", "chemicals", "passengers", "grain"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061810.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 308013, "question_id": "nnVcisboP54FpqwcRawrFr", "question": "Knowing and understanding what type numbers helps to decipher time here?", "choices": ["syrian", "roman", "arabic", "egyptian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000308013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 230719, "question_id": "noLzCnCku7Eh7Mw7rwf8vQ", "question": "What type of transportation is shown?", "choices": ["air", "water", "road", "rail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000230719.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 49515, "question_id": "nqxWSMzqeEhWSedsTmFoyB", "question": "Where is this giraffe located?", "choices": ["zoo", "hospital", "circus", "wild"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000049515.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 337399, "question_id": "nsNLmomowy4mr9zPfjefqa", "question": "How did the car window open up for the dog?", "choices": ["broken window", "wind", "owner", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000337399.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283784, "question_id": "nuLnPrPzLUVHJjRJHtaLb6", "question": "What type beer is being consumed here?", "choices": ["special local", "bush", "guinness", "pabst"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 479870, "question_id": "nv6kZ4PRMQvpSPrE8keUkw", "question": "What type of license does the driver of this vehicle need?", "choices": ["motorcycle license", "commercial license", "scooter license", "operator license"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000479870.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 317692, "question_id": "nw4t74kQUFgQkz24N4sztU", "question": "What color would one get if the door color were combined with the color of the first border?", "choices": ["green", "orange", "purple", "grey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000317692.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92234, "question_id": "nwjk3kmh4Nz6Mb6hfUusjJ", "question": "What type of parking is available?", "choices": ["street", "lot", "valet", "diagonal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000092234.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 473303, "question_id": "nxDEUcx5D8WQWrmX9fCK2g", "question": "What is on the plate?", "choices": ["toothpick", "knife", "umbrella", "fork"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000473303.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 426871, "question_id": "o2xU8MwLR6o9x5kaLDLLNS", "question": "What is the best term to describe what the skier is doing?", "choices": ["racing", "slalom", "best trick", "downhill"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000426871.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 157842, "question_id": "o37a7uNzcrtmouZUBYzX6Z", "question": "The animal is baring what?", "choices": ["backside", "teeth", "stinger", "claws"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000157842.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 190329, "question_id": "o5M5b6oUaoBvcfbq5GWVWH", "question": "What color is the animal on the pillow?", "choices": ["green", "orange", "purple", "blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000190329.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 319246, "question_id": "o5ZDbBhvMJGpaLbsMAVbwf", "question": "What is on the grass?", "choices": ["boxes", "animals", "benches", "humans"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000319246.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 256120, "question_id": "o5kAvnZddpp4T2mUHz52Au", "question": "What might these girls be hoping for here?", "choices": ["candy", "hot dogs", "fertilizer", "animals"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000256120.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456549, "question_id": "o5oKkqmXyYjPErqm7osRqr", "question": "What is the role of the person shown?", "choices": ["passenger", "patient", "coach", "player"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456549.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 546947, "question_id": "o5pz38BHuei2BSv95qxwGF", "question": "What is the paper in this room used for?", "choices": ["writing", "cleaning", "wrapping", "reading"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000546947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181494, "question_id": "o6cLpSzsDMY9UgeovdbPAU", "question": "What type of beverage is in the silver boxed container to the right of the sandwiches?", "choices": ["pepsi", "iced tea", "coca cola", "coffee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000181494.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 295557, "question_id": "o76qp3kz3yNeJc2evrMq2W", "question": "How is this powered?", "choices": ["solar power", "natural gas", "batteries", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000295557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 529334, "question_id": "o7JBizNAkogw8w6fd22HJA", "question": "Which country's flags is in front of the building?", "choices": ["united states", "italy", "france", "canada"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000529334.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 364945, "question_id": "o96m6adACbeYqmE6zpSvrB", "question": "People in the distance standing on the pier might be doing what?", "choices": ["grilling", "fishing", "picketing", "escaping"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000364945.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194831, "question_id": "o9GCycs2pqg3bvruszgjYy", "question": "What type of pathway is shown?", "choices": ["road", "trail", "aisle", "sidewalk"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000194831.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146037, "question_id": "o9KQaJYofxzqHFLySBiTnQ", "question": "From the state of the trees and plants in front of the clock tower what time of the year is it?", "choices": ["winter", "summer", "spring", "fall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000146037.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126882, "question_id": "o9i7pA7duqFkvDrbZVcCvn", "question": "What did the owner hope to accomplish by putting them in the oven?", "choices": ["bake", "shatter", "toughen", "sterilize"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126882.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532138, "question_id": "oAYbPpouHyMeMkaXSah2kx", "question": "What type of material is the plate the man is holding?", "choices": ["paper", "ceramic", "plastic", "metal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000532138.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 392361, "question_id": "oCKV6G4sVtVUpANd3uJMEU", "question": "What kind of pole is shown?", "choices": ["stripper", "flag", "ski", "barber"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000392361.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 189202, "question_id": "oCPCfZqZAudACfCyDsANwo", "question": "What are people wearing black taking here?", "choices": ["tumbles", "air", "gold", "pictures"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000189202.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 20806, "question_id": "oDXnXsA7fAeXxqLMKorn6d", "question": "Why are his legs twisted behind him?", "choices": ["executing trick", "falling", "skis broken", "out-of-control"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000020806.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283132, "question_id": "oDuCGvroCUpfaEuLEjR9A4", "question": "What is coming out of the animal's mouth?", "choices": ["tongue", "meat", "foam", "bird"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000283132.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 169464, "question_id": "oFBowpNoAhYdfEZ7P9o2PA", "question": "What activity does this horse get a lot of judging by it's muscular body?", "choices": ["sitting", "standing", "running", "laying down"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000169464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552628, "question_id": "oFrDSiUSdDSmfnWyii5ZRZ", "question": "What type of activity can be done here?", "choices": ["washing", "resting", "cooking", "painting"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552628.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 355318, "question_id": "oFyficFBP9YQNz2YwAvLxe", "question": "What can be said about the snow here?", "choices": ["fine", "melting", "icy", "deep"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000355318.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168441, "question_id": "oHGUAhds9LApaoijV49iWv", "question": "What protein is is found and makes up this cow's horn?", "choices": ["hemoglobin", "insulin", "p53", "keratin"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000168441.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 329994, "question_id": "oHqgNvxnpQXQ8D6Z6JsKZV", "question": "Why is the man crouching?", "choices": ["to jump", "to hide", "to attack", "to balance"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000329994.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 99257, "question_id": "oJhJCxo73hZhPZJfqDcDis", "question": "Where is this toilet placed?", "choices": ["bike", "bus", "train", "auto"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000099257.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 379141, "question_id": "oKLqtJaWC48wGgM7J7j7P5", "question": "What is the flavor of this ice cream?", "choices": ["scotch", "chocolate", "brownie", "vanila"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000379141.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 126219, "question_id": "oLjMgZP3eFxidittu9RwYg", "question": "What is the thin black rod on the front window used to do?", "choices": ["hold tickets", "power train", "hold letters", "clean windshield"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000126219.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 349315, "question_id": "oN95aufbZUogj2TSYUgHUn", "question": "What part of the house is this a part of?", "choices": ["toilet", "bedroom", "kitchen", "living room"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000349315.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268118, "question_id": "oPhHkXfthPZWsyxwyTTkro", "question": "The frisbee shown here is imprinted with what creatures footprint?", "choices": ["horse", "dog", "snake", "giraffe"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268118.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 149735, "question_id": "oRHGRDfpuHeTQvjMg6c92u", "question": "What would a person get if their car was by the sign for an extended period?", "choices": ["reward", "nothing", "fine", "food delivery"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000149735.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569899, "question_id": "oRj6fKpmDdZ9vWwczcsrKz", "question": "What are these animals usually given in captivity?", "choices": ["leash", "clothes", "nothing", "horseshoes"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000569899.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411388, "question_id": "oS4BBqMwSz4xuAQ9mzUP9w", "question": "What type of roof does this building have?", "choices": ["tile", "mud", "wood", "thatch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000411388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336638, "question_id": "oVaLiuwnMzKvojV3xmX7K5", "question": "What action is most likely taking place beyond the stop sign?", "choices": ["boat race", "music concert", "dance", "road construction"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000336638.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 168829, "question_id": "oWXmTS5XN6NmDcNY2aVaQV", "question": "What can be said about the zebra's head?", "choices": ["warm", "non-visible", "cold", "congested"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000168829.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 224611, "question_id": "oWoitSauwK8bku7KnHkoZx", "question": "What body part is missing on the elephant?", "choices": ["ears", "snout", "tusks", "tail"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000224611.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 86500, "question_id": "oYK2mwvbMWdhKVi6SpYssF", "question": "What uncommon topping is on this pizza?", "choices": ["chocolate", "shrimp", "corn", "tuna"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000086500.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 536208, "question_id": "oYRuBn7d4y8SVYgxN6VvJK", "question": "What is she wearing while doing this sport?", "choices": ["nothing", "bikini", "swim trunks", "wedding dress"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000536208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 369368, "question_id": "oYTGDYD3Sa6KQToqCSJ2HM", "question": "Ashok Leyland is belongs to which country?", "choices": ["nepal", "russia", "india", "china"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000369368.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91087, "question_id": "2DjFMdD7awD6DckSTHo6v4", "question": "What type of hat is the man wearing?", "choices": ["baseball cap", "bucket hat", "fedora", "beanie"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283519, "question_id": "2E9wod4ejLmshZNK8fZJLX", "question": "What are the two large red boxes on the sidewalk next to the pole used for?", "choices": ["recycling", "mail delivery", "trash disposal", "newspaper distribution"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000283519.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 24837, "question_id": "2RkViuuNnGyUMJHiejW3eJ", "question": "What is the name of the breed on the right?", "choices": ["siamese", "persian", "rag doll", "tabby"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000024837.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 400209, "question_id": "38NB6VxJ5cVriKNyxxr3r3", "question": "What is unusual about the bat?", "choices": ["backwards", "too flexible", "too small", "too big"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000400209.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 339537, "question_id": "3uu4h8NhMu87VkAL2sc9eF", "question": "What part of these flowers are regular eaten?", "choices": ["bulbs", "leaves", "petals", "seeds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000339537.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 277875, "question_id": "3zdbaXyrmPpb8gKrEFzWrU", "question": "One can add what words to one of the signs to get the name of a twin island nation?", "choices": ["and tobago", "and grenadines", "and caicos", "and nevis"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000277875.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117056, "question_id": "46JdexNuYLRz8qf883Yf5A", "question": "What percentage share in this airline does its government hold?", "choices": ["326", "758", "196", "558"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117056.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 273026, "question_id": "4Sm6F7hq7msaCzJkDxc2Tg", "question": "What type of intersection is the stop sign in the middle of?", "choices": ["dead end", "oneway", "threeway", "fourway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000273026.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 260675, "question_id": "4aUDiUdnGMPccowTLx3vCj", "question": "The scene is ready for what to be done?", "choices": ["watching television", "cooking", "photography", "showering"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000260675.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 107608, "question_id": "4rtLm9eMEvCXrNJ4GmZtMH", "question": "One would have to eat these sparingly if they have what disease?", "choices": ["autism", "lung cancer", "diabetes", "alopecia"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000107608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159212, "question_id": "5R7o8zWPGYdZQFkpnYfEzx", "question": "What is the catcher looking at?", "choices": ["batter", "pitcher", "crowd", "baseball"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159212.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 367852, "question_id": "5ZCwDnShhvZqGCXsmN85Zm", "question": "What is the direction of the car stops?", "choices": ["south", "east", "north", "west"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000367852.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 1362, "question_id": "5b7JN9HqYCdvJKwQyyoQNy", "question": "What is likely the first digit of the phone number listed?", "choices": ["three", "one", "four", "two"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000001362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 334304, "question_id": "64vJHcq9WBEqGd2ervZ7xD", "question": "What mail service began in a year that coincides with this address?", "choices": ["ups", "usps", "pony express", "fedex"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000334304.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 342308, "question_id": "67zT59r6JjoM8Drpzmp243", "question": "What professionals prevent them from crashing into each other?", "choices": ["crossing guards", "airtraffic control", "security guards", "policemen"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000342308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491232, "question_id": "69YncFiDiCFVoeJd6qhmKv", "question": "What do the chemicals do?", "choices": ["make shiny", "change colors", "remove water", "decontaminate waste"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000491232.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 377292, "question_id": "6Mi3n452jkMhK56KyqJd9i", "question": "What is the item that the white things are in?", "choices": ["barrel", "blender", "tray", "box"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000377292.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 563533, "question_id": "6TEwxXNonNqeNWwwYBxK4E", "question": "Based on the wave size what level is the surer most likely?", "choices": ["beginner", "pro", "semi pro", "amateur"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000563533.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 181765, "question_id": "6dbUYa58B4zTG4tWrmaUpN", "question": "What is the slogan of her shoe company?", "choices": ["just do it", "play harder", "no fear", "life is short"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000181765.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 409253, "question_id": "6xCasUS4CuAF6Rjs3iKPmF", "question": "How did the saint whose name appears on the sign die?", "choices": ["crucifixion", "guillotine", "firing squad", "drowning"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000409253.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146265, "question_id": "76hWiKMbPJJLJurWUPPMKZ", "question": "What would the time be based on the objects present?", "choices": ["745", "1119", "919", "645"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000146265.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135574, "question_id": "7VoXZEYnc8iAPdBxJDk4Zo", "question": "Which object contains the most information about voting?", "choices": ["urinetown", "book", "time magazine", "mouse"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135574.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 234838, "question_id": "7XTKeBSvTA8ozZVgKoLkQJ", "question": "What will one see if one shakes the two items on the front right?", "choices": ["real snow", "fireworks", "balloons", "fake snow"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000234838.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451466, "question_id": "7rnK7HXpzSa7etBoR6pQXX", "question": "What are the little sculptures made from?", "choices": ["knives", "rods", "surgical scissors", "forks"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000451466.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64447, "question_id": "8FEe9uHkD7rNKzsv8HmY3w", "question": "Which direction is the girl facing in relation to the kite?", "choices": ["away from it", "above it", "below it", "towards it"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064447.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 470725, "question_id": "8hE645YN87tng2ctTwX25K", "question": "What word was added in after the rest?", "choices": ["station", "tut", "street", "king"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000470725.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 56683, "question_id": "9SZ9CEZiJZj5avHy7KhMgr", "question": "What part of his body will be used by the player to move the ball?", "choices": ["feet", "elbow", "head", "knee"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000056683.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 517982, "question_id": "9Zdd7d58EvE6XbeQLv4vVs", "question": "If the shiny object was this it would be worth the most money?", "choices": ["brass", "alloy", "copper", "gold"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000517982.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 158862, "question_id": "9wijHGXkFpscivH5DHX7zM", "question": "What are all three guys wearing?", "choices": ["flippers", "swimming trunks", "wetsuits", "swimming caps"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000158862.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 213208, "question_id": "9yb3qjVhxpzRk3Rg6gu8uZ", "question": "What type of music does the owner of the phone enjoy?", "choices": ["classical", "rock", "rap", "pop"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000213208.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 543540, "question_id": "A5qKH5mskg22c7wjw4WmsQ", "question": "When were these types of signs first installed in the U.S?", "choices": ["1970", "1888", "1936", "1915"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000543540.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286054, "question_id": "AGGKBvGH4fUbNntEGycxK2", "question": "What does this kid wearing a diaper tell you?", "choices": ["is in kindergarten", "is stubborn", "not pottytrained", "is girl"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000286054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 106396, "question_id": "ANHpuxjGg8uCpBm8TbRNC8", "question": "Which civilization created this type of numbers?", "choices": ["roman", "greek", "egyptian", "sumerian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000106396.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 139683, "question_id": "AnrDNscJNLP3YCmuXzMHDM", "question": "What will she do with the rope?", "choices": ["whip horse", "make barrier", "tie up horse", "put on horse"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000139683.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243880, "question_id": "AtXRduEveaRHSArWooPyhG", "question": "What is the most likely size of the laptop screen?", "choices": ["13", "15", "17", "five"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000243880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160047, "question_id": "B8b4QguTF9DfcXawoV8xc9", "question": "Which government group usually works closely with these drivers?", "choices": ["mayors office", "irs", "board of education", "homeland security"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000160047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532767, "question_id": "B9tC72Td4KsMSFTYr776b6", "question": "This actor became famous on what television show?", "choices": ["big bang theory", "saturday night live", "everybody loves raymond", "mad about you"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000532767.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551377, "question_id": "BD9yQ4ADTRBp2KBHz5JFAd", "question": "These treats can either be fried or what else?", "choices": ["baked", "boiled", "barbecued", "grilled"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324597, "question_id": "BDsWeBUaF4VRGsJ49iLTLs", "question": "How did the fruit get on the wall here?", "choices": ["naturally fell", "cows", "birds", "person placed"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000324597.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51974, "question_id": "BYqBrBWkcJsUryNpHyCxqH", "question": "What was the last year this truck was produced?", "choices": ["1972", "1954", "1922", "1938"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000051974.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 153053, "question_id": "BbdvPt3MfmStiJG6UpxoqD", "question": "What side did this country fight on during World War II?", "choices": ["allies", "neutral", "axis", "became country after"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000153053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 387302, "question_id": "Bg2ejbeCHiJoQ267FEj839", "question": "What type of material covers the ball?", "choices": ["felt", "satin", "flax", "silk"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000387302.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 262464, "question_id": "BhnhhKYt5ZikYMQKPzDgVM", "question": "The blue item once contained what?", "choices": ["soap", "pasta", "beverage", "cheese"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000262464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 333828, "question_id": "Bzw2nkM42G5BPXdRcKRrfM", "question": "What is the purpose of the white rail near the road?", "choices": ["keep on road", "prevent animals", "corral grass", "limit speed"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000333828.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 442996, "question_id": "C25gNFVkpJebXyMoi8KcAu", "question": "What kind of waves are these?", "choices": ["boat wake waves", "tidal waves", "spilling waves", "hollow waves"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000442996.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 121362, "question_id": "C2XmmTEdDV66NASxnr9sPM", "question": "What action with the ball did the tennis player just take with her arm in the air?", "choices": ["kick", "catch", "toss", "throw"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000121362.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 146890, "question_id": "C4zoGCPRy4oZjaJQUGtFUH", "question": "Which one of these is a brand of the white item under the pizza?", "choices": ["minwax", "sanyo", "meineke", "bounty"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000146890.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 556947, "question_id": "CNPmRZEEsPmwLgCmL7yB9Z", "question": "The item around his neck signifies that he has what?", "choices": ["his shots", "purebred genes", "owner", "rabies"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000556947.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 101944, "question_id": "CNVDR5apYwNumKLzDqoWvi", "question": "The person who is seated in this area visited what business earlier today?", "choices": ["bakery", "salad bar", "gym", "bar"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000101944.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61768, "question_id": "Cq4MjjR7hxSWSNiuxYMKMG", "question": "Which one of these groups of people might be most likely to obey this advertisement?", "choices": ["snowbird", "skier", "west indian", "carpet bagger"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000061768.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291401, "question_id": "D8NvSpZHT5f6AF6ZinXm7B", "question": "What is the most likely reason the cow is between the logs in the water?", "choices": ["stuck", "thirst", "resting", "hunger"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000291401.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 52697, "question_id": "D95r5cexA7bTyXpC9Mswes", "question": "What is inside the open flap at the front of the plane?", "choices": ["fuel filler", "luggage", "pilots", "wheels"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000052697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 134403, "question_id": "DAdE8PsQr87g7UgWdK9z4a", "question": "The white items are made of what?", "choices": ["potato", "cheese", "flour", "paper"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000134403.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 90483, "question_id": "DJPbcYtrbrGQDe4Ppj3HjF", "question": "What is usually in the hole on the bathtub?", "choices": ["p trap", "water supply", "drain stopper", "overflow drain"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000090483.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 6054, "question_id": "DYao5Ph7HwvmBfHNdmLsEV", "question": "In which region of the world would you be most likely to find the fruit growing?", "choices": ["west indies", "england", "france", "north pole"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000006054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 38458, "question_id": "DdQGuWgeQfCruDdKpbcb8Z", "question": "What band sings the song that the graffiti references?", "choices": ["bon jovi", "queen", "aerosmith", "journey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000038458.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 520426, "question_id": "DyJ7M7mnEqajbH7vWwWi5i", "question": "What is the marital status of the woman on the bed?", "choices": ["single", "married", "divorced", "engaged"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000520426.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 483097, "question_id": "E3yUwCCdynHZpL8o2cgBtu", "question": "In what neighborhood was the original one of these restaurants opened?", "choices": ["coney island", "flatbush", "park slope", "red hook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000483097.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 420759, "question_id": "E8X9RvrDmosxQAk7vuVW23", "question": "In what common starter might you find the green stuff?", "choices": ["salad", "prawn cocktail", "crab cakes", "smoked salmon roll"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000420759.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133890, "question_id": "EAPg8GVksgsMup6MemGNe3", "question": "What would usually accompany the items on his tie?", "choices": ["numbers", "food", "arrows", "globe"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000133890.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 489076, "question_id": "EAQWiRRuGuE69nXanHZPbY", "question": "The person who owns this truck probably loves which one of these books?", "choices": ["bible", "origin of species", "da vinci code", "koran"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000489076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 475249, "question_id": "ECJAFUyX3k2tNdkrwThmEi", "question": "What will be used to fix the hair of the dog after being dried?", "choices": ["hose", "table", "brush", "paws"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000475249.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 7190, "question_id": "EHV5CfZEukz6BXFD7ZM2zj", "question": "In which of these months will street cleaning be undertaken?", "choices": ["march", "january", "june", "december"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000007190.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 311308, "question_id": "EKNnCQZccQMEuKMeSbRhtM", "question": "What was the first name of the founder of this clothing company?", "choices": ["donald", "richard", "rufus", "hamilton"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000311308.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 51582, "question_id": "ENjfG2k6a5oCcyinXbndJE", "question": "What type of utensil is the man using to peel the orange?", "choices": ["butter knife", "pocket knife", "butchers knife", "steak knife"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000051582.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44966, "question_id": "ET2rvBzaaatkf6iswfhNEq", "question": "The container is made of what material?", "choices": ["cardboard", "cloth", "glass", "styrofoam"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044966.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 557414, "question_id": "EYE2foCHryyaC5uzz72mpS", "question": "Which vehicle is the heaviest?", "choices": ["red car", "black car", "white van", "white car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000557414.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 159239, "question_id": "EdmYr3mGkciiipz7HLu7XH", "question": "What would a thief fear in this neighborhood?", "choices": ["stray dogs", "neighborhood watch", "slippery sidewalks", "sink holes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000159239.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 258813, "question_id": "EfksWwCT2LCQmaR8gQuiBS", "question": "What is the large cylinder above the toilet used for?", "choices": ["pump water", "heat water", "evaporate water", "store water"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000258813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548033, "question_id": "EjxCEXenGvJnxK2sRpJEWk", "question": "What are this type of decorative boards called?", "choices": ["lack of funds", "wallpaper", "wainscoting", "halfwall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548033.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 385608, "question_id": "FAiLCWdLbcCFTaDxShzQwi", "question": "What food might it find here to eat?", "choices": ["humans", "berries", "salmon", "turkey"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000385608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 453950, "question_id": "FEzgbEeLZcEZMaZCuBHAt3", "question": "When did the event on the screen happen?", "choices": ["tomorrow", "several years ago", "month ago", "very recently"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000453950.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544756, "question_id": "FLzhfXXTArDDRTyh5KZTsp", "question": "What is the shape of toilet?", "choices": ["square", "rectangle", "circle", "oval"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 382322, "question_id": "FMFLboMEjH2bXhUisQcLdg", "question": "Which one of these is another name for the white topping shown?", "choices": ["toadstool", "frog house", "rose of sharon", "barnacle"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000382322.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 104526, "question_id": "FSbviwmscYvCgkuzXBLQdk", "question": "What is the most appropriate name for the sport the women are playing?", "choices": ["frisbee soccer", "frisbee tennis", "frisbee football", "frisbee golf"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000104526.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 469309, "question_id": "FU6dgGwxmckzUSXNTmcjFx", "question": "Which one of these people can leave earliest?", "choices": ["brown pants", "blue jacket", "wearing cap", "green jacket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000469309.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268160, "question_id": "FWqNcJvRBhFAkX5xjTgT6j", "question": "The clear wrap is intended to say what about this parking meter?", "choices": ["not working", "needs coins", "brand new", "wrong color"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000268160.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 380476, "question_id": "Ff2VxZ3VxoRN8woetZqSmC", "question": "What part of the half pipe allows the snowboarder to land perfectly?", "choices": ["height", "soft snow", "slope", "ledge"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000380476.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57327, "question_id": "FnpPDaGyRHkHkGsDDdQ6bi", "question": "What is the purpose of the equipment underneath the plane's main body?", "choices": ["passenger seats", "putting out fires", "water landing", "tree cutting"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000057327.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 548449, "question_id": "FpUKU7H93crNwZxPUTJru3", "question": "Based on their decor they are fans of what city?", "choices": ["montego bay", "des moines", "amsterdam", "paris"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000548449.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 243880, "question_id": "FwnZKtV9GQ5g9hJjNPWyLH", "question": "What would be an ideal activity to use this older style laptop for?", "choices": ["multimedia", "web browsing", "gaming", "photo editing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000243880.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 122673, "question_id": "G2MshDS6ad5xhx5ZYhWFwQ", "question": "What body part is depicted in black?", "choices": ["hand", "bottom", "foot", "leg"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000122673.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 566977, "question_id": "G7fMKG8TyyuPqWreZYSv4Q", "question": "The skating surface is likely made of what material?", "choices": ["concrete", "marble", "granite", "mud"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000566977.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 25067, "question_id": "GDFfxN5btinQCF8CtT3Gp9", "question": "How was this cake cooked?", "choices": ["grilled", "roasted", "broiled", "baked"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000025067.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 74243, "question_id": "GNvSEDnPhTmfUBK8Vttu7F", "question": "This is what type of environment?", "choices": ["tropical", "arctic", "desert", "temperate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000074243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177378, "question_id": "GPeFo4qqLanCBwZjRnDHZ4", "question": "What is the most unusual characteristic of these benches?", "choices": ["color", "material", "style", "size"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000177378.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 286868, "question_id": "Gw4rFjqvkUuYnuZ9kjYx8o", "question": "These toilets are ready for whom to use them?", "choices": ["men", "women", "children", "no one"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000286868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 69524, "question_id": "H47E2NRiktXjRfMzpYkuxd", "question": "The elevation of the clouds indicate the man is what on the mountain?", "choices": ["at base", "down low", "at top", "high up"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000069524.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 468813, "question_id": "H56mwbmNqpwF3G5D5V2oXn", "question": "What type of food does this bird eat by looking at the beak?", "choices": ["grain", "fruit", "nectar", "fish"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000468813.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 187161, "question_id": "HF5focvnihca6unmTPkfEE", "question": "What is reflected in the mirror?", "choices": ["two shelves", "two people", "two sinks", "two vases"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000187161.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 18906, "question_id": "HSUuzXNhv5gCbD874QkbN7", "question": "This beverage brand hails from which country?", "choices": ["britain", "ecuador", "australia", "denmark"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000018906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360363, "question_id": "HWcbKGgXvwDyuKh4fomMBm", "question": "The hitter is standing in the very back of the what?", "choices": ["batter box", "pitchers mound", "diamond", "warning track"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360363.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201802, "question_id": "HjzntPw7zL4EfUGRRrDTP8", "question": "Where did Europeans first see these types of flowers?", "choices": ["tanzania", "mexico", "netherlands", "persia"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000201802.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 552608, "question_id": "HkyDsCbxyZ9nEwhYCp2kAH", "question": "What was the original name of this beverage company?", "choices": ["brads coca", "brads drink", "cocacola", "brads cola"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000552608.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 227211, "question_id": "HyHMd9YULHE6bY7zshomyL", "question": "Why is she floating over the bed?", "choices": ["mystic", "uncomfortable", "illusion", "dead"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000227211.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451939, "question_id": "HybeaQzC69E8HmxN97zudL", "question": "What is the kind of feeding relationship between the giraffe and the birds above?", "choices": ["parasitic", "predation", "mutualism", "competition"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000451939.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 328345, "question_id": "J36Kp7VhHoiYsjttWRPcyj", "question": "Why is the player in the blue helmet running?", "choices": ["dodging ball", "catch ball", "back to bench", "to next base"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000328345.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 72087, "question_id": "J4THJKokLT9gCAU6VRng74", "question": "What is the English name of this explorer?", "choices": ["ferdinand magellan", "christopher columbus", "vasco da gama", "james cook"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000072087.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478397, "question_id": "J6AqnhFJAqhkeX9qDUqG9X", "question": "What activity is the person above doing?", "choices": ["surfing", "snow boarding", "sliding", "walking"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000478397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 225815, "question_id": "J9DcZNaaD3nTns9YwYB5U9", "question": "What would be a useful addition to this scene?", "choices": ["beach chair", "desk", "drums", "pew"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000225815.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 332449, "question_id": "JWChNiA6BvYcjXRrXVh2xr", "question": "What items can be cleaned in this building?", "choices": ["shoes", "dishes", "clothes", "car"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000332449.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290900, "question_id": "Jh4hnyvGRAp6iwo5e5qmD6", "question": "What is the general air temperature outside the train?", "choices": ["cold", "hot", "freezing", "warm"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000290900.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401334, "question_id": "JkyT4bJ6kRkXmt6KsgaXyQ", "question": "What fish does this creature allegedly love?", "choices": ["chub", "tuna", "salmon", "perch"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401334.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569287, "question_id": "Jm33rhmQCmiYf7u3Tgu5Ty", "question": "What type of drink is next to the bunch of bananas?", "choices": ["tea", "kombucha", "smoothie", "fruit juice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000569287.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 324144, "question_id": "Jn84KKCnuBsKQgKyb9arDC", "question": "The mousepad resembles what household item?", "choices": ["curtain", "table mat", "rug", "blanket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000324144.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 376482, "question_id": "JnLbWczLBUuM6LCTXDpsuC", "question": "How much time is left on the meter?", "choices": ["4 hours", "0 hours", "2 hours", "8 hours"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000376482.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 461297, "question_id": "JwcxiuyMsFh42RWgzyLrah", "question": "What style of decor does the owner of this home probably like?", "choices": ["transitional", "shabby chic", "tropical", "modern"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000461297.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 183800, "question_id": "K3iLot6JpN5QsiMXNaATBA", "question": "What type of building is most likely nearby?", "choices": ["police station", "daycare center", "airport", "prison"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000183800.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 523588, "question_id": "KEteNuUWRXANqGkTbjFLtz", "question": "When did this railway go defunct?", "choices": ["1998", "2001", "2020", "2012"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000523588.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 491392, "question_id": "KJZDG7jMU3BPX25xRCfUZu", "question": "What is atypical about this bathroom?", "choices": ["uncleanliness", "excessive size", "lack of fixtures", "color scheme"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000491392.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 8104, "question_id": "KL6KQrLcCsovs2EAk2jA6G", "question": "What is the purpose of the round item hanging on the wall?", "choices": ["hold toilet paper", "hold shower curtain", "hold plunger", "hold towel"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000008104.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 511010, "question_id": "KRSUkUA3TabGFEc2G7VHKo", "question": "Which ocean is the surfer most likely surfing in?", "choices": ["pacific", "arctic", "indian", "atlantic"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000511010.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 45590, "question_id": "KxXd4FEmgr5JWPrekBdJKx", "question": "What type of television is behind the cat?", "choices": ["crt", "lcd", "plasma", "oled"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000045590.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 968, "question_id": "L6bCLPktE9ygF4AqqdKRV9", "question": "Why do these beings like laptops?", "choices": ["resale value", "do office work", "play video games", "warmth"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000000968.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 431320, "question_id": "LC2hv29pv4U6uzSbGcbwvb", "question": "What type of hammer is smashing the phone?", "choices": ["engineer", "ballpeen", "claw", "sledge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000431320.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 57043, "question_id": "LMS7tiXFvyqB4ncj2iygH8", "question": "The newborn baby is located in what area?", "choices": ["doctors office", "walkin clinic", "hospital icu", "home"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000057043.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 524240, "question_id": "LQ2sgrP3XBFrdjysU8NaJF", "question": "What might have been used to most seriously deface this sign?", "choices": ["q tip", "spray paint", "hole punch", "crayon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000524240.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 242778, "question_id": "Lbom4a6rdScbvimJDPfZyU", "question": "What would the sign above the bus say in the United States?", "choices": ["out to lunch", "coming soon", "no vacancies", "for rent"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000242778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41206, "question_id": "LsSM4tyuDkU8qGsGPpHbfY", "question": "What is preventing the dog from getting to far from the owner?", "choices": ["water", "wind", "board", "leash"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000041206.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 41000, "question_id": "LtShr9n7QxLyWj5knZeHY7", "question": "How is this type of aircraft called?", "choices": ["propeller plane", "heliplane", "fan plane", "spin wing plane"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000041000.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 570053, "question_id": "M3RZ2wK5qsbWVgMUzzQuHz", "question": "What is one purpose of the brown stuff on the ground?", "choices": ["perfume air", "prevent mosquitos", "retain moisture", "soft walking path"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000570053.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 241869, "question_id": "MCprsEksPvbmjW2qRd9oLh", "question": "Why is the frisbee soft?", "choices": ["protect teeth", "easier storage", "easier to make", "easier flight"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000241869.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 103868, "question_id": "MEJAPBSxwR8GZ4roWPcdfs", "question": "This is helpful in case of what occurrence?", "choices": ["tornado", "flood", "fire", "earthquake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000103868.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 64131, "question_id": "MPwcv5q2vxLejwQC862Sx9", "question": "What letter is traditionally associated with the bend in this object?", "choices": ["u", "r", "g", "e"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000064131.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 290877, "question_id": "MbD6Z7FC7jCj98YqhqXEKM", "question": "In what year did this airline celebrate its 80th anniversary?", "choices": ["1995", "2008", "2020", "2017"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000290877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 439844, "question_id": "Mc4waNkn8VetRdMNvXyasj", "question": "What is the size of the person in relation to the teddy bear?", "choices": ["smaller", "same size", "cant tell", "larger"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000439844.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 196181, "question_id": "MoepnVeUe9LppyyuSc7dPJ", "question": "Which political party is winning according to the graphic on the computer screen?", "choices": ["republicans", "democrats", "independents", "libertarians"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000196181.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 318684, "question_id": "N2xZgJyU4WFsr7fGCzViuw", "question": "Which one will get wet if it starts to rain?", "choices": ["left", "right", "middle", "none"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000318684.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152200, "question_id": "NKcgsXJFGvDRTWig22Ct3v", "question": "When did this air force become independent from the British Air Force?", "choices": ["1923", "1952", "1938", "1940"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000152200.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 97536, "question_id": "NQsr9iEm8XfTHjz7XTHUhG", "question": "What does the ESP refer to?", "choices": ["extrasensory perception", "espionage", "espana", "especially"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000097536.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 202758, "question_id": "NUVQRgyGJbbi5uMmtxrmsr", "question": "Which one of these is a likely ingredient of the white food?", "choices": ["almonds", "raisins", "yeast", "lemon juice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000202758.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 102286, "question_id": "NVMFch5n3bfaxvmhnqTakz", "question": "What are the bubbles that have formed on the stems of the plants within the vase?", "choices": ["oxygen", "hydrogen", "methane", "carbon dioxide"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000102286.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 411628, "question_id": "Nd3CiyhUSfrbeVL8p7aDj7", "question": "Where did the dough for the smaller donuts come from?", "choices": ["donut hole", "separate batch", "sliced bigger ones", "different package"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000411628.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 254463, "question_id": "NdpMNQN6bCGoRgcJ2wdwFU", "question": "What type of cat is this?", "choices": ["tabby", "ocelot", "siamese", "calico"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000254463.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207944, "question_id": "NgLQUMgt5PMsPEM3cB6fgC", "question": "What is the skier trying to do by turning her skies inward?", "choices": ["slow down", "turn around", "speed up", "turn left"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000207944.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 462637, "question_id": "NxjTTHE2HNrn8v5oGT6gJW", "question": "What can be said about the box around the giraffe?", "choices": ["none of these", "its border", "cardboard box", "natural"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000462637.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 123280, "question_id": "PACaRPz3U5cF3VRYivSUuH", "question": "What type of people assist passengers who are traveling on via this mode?", "choices": ["flight attendants", "cabbies", "conductors", "sailors"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000123280.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 147845, "question_id": "PcSazBsEwGGFzxWr3Wiyg7", "question": "What state are they currently traveling through?", "choices": ["new south wales", "tasmania", "victoria", "queensland"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000147845.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 515037, "question_id": "PmUW39AyndzQmyRAZsSx6C", "question": "How can the baby's pants be described?", "choices": ["moncolored", "rainbow colored", "undersized", "too long"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000515037.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549751, "question_id": "PnikdavfqXYzT99FwoHio2", "question": "What type of interior lighting is used on the train?", "choices": ["incandescent", "hps", "fluorescent", "led"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000549751.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 223750, "question_id": "Ps7K5s6YakiDZA222eVSd4", "question": "At what pace does the women appear to be walking at?", "choices": ["medium", "fast", "slow", "very slow"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000223750.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 299877, "question_id": "PtiTZ5Ep8GrJU2dsVzUEMR", "question": "Where is this airline's largest hub located?", "choices": ["chicago", "pittsburgh", "salt lake city", "helena"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000299877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 22033, "question_id": "QF4JZ8K566GbGnqdVdohQm", "question": "Why does the cat have glowing eyes?", "choices": ["its sick", "camera", "its posessed", "halloween"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000022033.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 506031, "question_id": "QpYAeRjdQFGLNDdhnN4JNA", "question": "What is unusual about this sign?", "choices": ["language", "apology", "harshness", "hand written"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000506031.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 235985, "question_id": "QqCFEYAcyGozmbtueAzfvG", "question": "What would start to hurt if one was watching the television while sitting on the chair?", "choices": ["neck", "ears", "hands", "feet"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000235985.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 456173, "question_id": "QrmRW9MAecGXMQByMtrgdU", "question": "What is the yellow flower with the orange center called?", "choices": ["daisy", "sunflower", "tulip", "anemone"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000456173.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 164136, "question_id": "QtZYut7duvzmasVuqZ6yPN", "question": "Which one of these music genres is commonly associated with that hairstyle?", "choices": ["jazz", "reggae", "rap", "rock"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000164136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 46565, "question_id": "R6rRcyurX8rmFTSM6hTSPS", "question": "Why is the sky hazy?", "choices": ["dark", "pollution", "waves", "fog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000046565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 61811, "question_id": "R9BS3MWCtdMRdbkVBj2LGC", "question": "What type of shirt is the boy wearing?", "choices": ["tuxedo", "tank top", "dress shirt", "hawaiian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000061811.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 291338, "question_id": "RKEiL8zppoXm2YVrv86rLP", "question": "What name is this building known as?", "choices": ["small jim", "tower of clock", "big sam", "big ben"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000291338.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 484464, "question_id": "RXquEb9yXrnJ4xvF2t7FHx", "question": "Which suitcase could be removed without disturbing the tower?", "choices": ["top", "grey", "tan", "bottom blue"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000484464.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 229091, "question_id": "ReXffAiqXcVHMoUTCDY8vj", "question": "What is the blue latticework in front of the window and the top of the door constructed from?", "choices": ["aluminum", "iron", "copper", "steel"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000229091.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 576058, "question_id": "SC3Xr7QFMhcTWB7uBiqeet", "question": "When did this city begin to add these types of meters?", "choices": ["2004", "2015", "1999", "2010"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000576058.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 371184, "question_id": "SGGH5xGiTHeemTa53FRzxw", "question": "What kind of sauce is on this pizza?", "choices": ["ranch", "buffalo", "tomato", "bbq"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000371184.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 544054, "question_id": "SKEkyS5WcgSLmSeFmnD9h7", "question": "This type of print is known as what?", "choices": ["stripe", "plaid", "floral", "polka dot"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000544054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 276140, "question_id": "SS9NP86UCtJ5gh9BQM7f8w", "question": "What is the name of the object depicted on the to of the statues head?", "choices": ["horns", "antlers", "crown", "spikes"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000276140.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 424784, "question_id": "SXMDacZYNgfJnzeB5GJXrR", "question": "What does one have to use to grab one of these toys?", "choices": ["sack", "lasso", "broom", "claw"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000424784.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 336681, "question_id": "SryBSeW58QfZBBLsy3J4rG", "question": "What type of material is the plate the bagel is placed upon?", "choices": ["metal", "glass", "plastic", "ceramic"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000336681.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 298659, "question_id": "T9fvJTeEtsivkD579NwiYE", "question": "In what decade was this style of appliances popular?", "choices": ["twenties", "seventies", "thirties", "fifties"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000298659.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117013, "question_id": "TFubu2RYBoLyAmxbJ6tuE2", "question": "What do you commonly insert to gain more time on this meter?", "choices": ["coins", "receipt", "keys", "hand"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000117013.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 275082, "question_id": "TPNoCmu8PGCSUZjvi2KYYo", "question": "What is shown that is something very strongly associated with the 1960's?", "choices": ["peace sign", "skateboard", "arrow", "bench"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000275082.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 133177, "question_id": "TSAwEr8yJiBVFEToEXETcU", "question": "What type of lift is on these trucks that move the container upward?", "choices": ["gravity", "compression", "water powered", "hydraulic"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000133177.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 10663, "question_id": "TYbCLtdr5Rv6cCizh4vPuk", "question": "Turning the valves seen here which direction will yield water quicker?", "choices": ["out", "clockwise", "counter clockwise", "in"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000010663.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 171870, "question_id": "TiVjCCGxRfwcYnweQnjsSj", "question": "What will they do with the paper?", "choices": ["draw on it", "wrap sandwich", "cut it up", "throw away"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000171870.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268417, "question_id": "URUT8zMvvBJDu3uJC4ktzR", "question": "What is the tower in the background made to help direct during night?", "choices": ["planes", "cars", "boats", "birds"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268417.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 558397, "question_id": "UuHMfFF7oyzCxYTtcNuf9N", "question": "What are the arranged cups being used for?", "choices": ["crafts", "organizing parts", "gaming", "drinking"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000558397.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 501076, "question_id": "V2GKDSNBVEWrhqbGcaQBom", "question": "How much tennis experience does the girl most likely have?", "choices": ["10 years", "5 years", "3 months", "3 years"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000501076.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221375, "question_id": "V5GVseSL8rgAAcQaweecW7", "question": "The hands of the clock most resemble which type of metal?", "choices": ["iron", "aluminum", "silver", "gold"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221375.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 78446, "question_id": "V6dPXXTY6yKXDBpnBmqoHV", "question": "The pattern on his tie is referred to as what?", "choices": ["tartan", "paisley", "polka dot", "houndstooth"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000078446.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 325136, "question_id": "VPyZJWD4tNcaebfwWP8nXZ", "question": "What type of skiing would this be called?", "choices": ["half pipe", "down hill", "slalom", "cross country"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000325136.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 565790, "question_id": "VXG5VHizaNA83NjkSCtPjD", "question": "Which one of these treats might they be making?", "choices": ["brownies", "chocolate", "wine", "smoothie"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000565790.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 399061, "question_id": "VtJvsUrKm5cdRyNHZZ7CGg", "question": "How are the numbers represented in this clock?", "choices": ["radio", "digital", "analog", "roman numerals"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000399061.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 306695, "question_id": "WBsmGhsQk6Sv8Go39T3qm9", "question": "What will they likely do with the stuff in the green container?", "choices": ["dip carrots", "throw away", "eat alone", "pour on rice"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000306695.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 512179, "question_id": "WUUoKUPjjH6Zda2Hu2d6jL", "question": "His attire implies that it is what season?", "choices": ["summer", "spring", "winter", "autumn"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000512179.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 280538, "question_id": "WXCxUU7hMk9iPqi5nUJ9A4", "question": "What structure does the structure on the right with the opening resemble most?", "choices": ["dead sea", "garden of eden", "stonehenge", "steamboat geyser"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000280538.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 108422, "question_id": "X6NxQK7dwa6dFDtZt5ZxBi", "question": "What sound would be most appropriate for the boy to make in this situation?", "choices": ["baa", "choo choo", "meow", "moo"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000108422.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 365280, "question_id": "X8HLqZcqMhM6dwUMV4Bi6S", "question": "Why would a city frown upon this skateboarder doing what he's doing?", "choices": ["damages property", "scares others", "steals from others", "might get hurt"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000365280.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 215415, "question_id": "XJN3SnNQcFThunyfdPKAvn", "question": "Why are the two children carrying identical red backpacks?", "choices": ["popularity", "cost", "fashion", "are family"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000215415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323778, "question_id": "XMY4ierJ7MaXMBHrfGDWxS", "question": "Approximately how old is the phone the person is holding?", "choices": ["10 years", "25 years", "100 years", "brand new"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000323778.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 293455, "question_id": "XZiFCecpA9vtW39Pzo3GDh", "question": "What fruit would this shade of surfboard be compared to?", "choices": ["banana", "cherry", "lime", "mango"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000293455.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 435722, "question_id": "XhbyymDKQ2AAgykgdvxbGn", "question": "What does the advertiser on the bus hope one will do?", "choices": ["visit studio", "avoid studio", "buy studio", "work for studio"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000435722.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 478122, "question_id": "Xn7NkH8HJR6AGnKrDXcxRF", "question": "Which one of these appliances can you safely put this plate in?", "choices": ["microwave", "oven", "fridge", "toaster"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000478122.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 490364, "question_id": "Xw9VBNiRCFWo5nxuscWw3D", "question": "What will be attached to the long poles?", "choices": ["animals", "ladders", "good luck charms", "sails"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000490364.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 212886, "question_id": "Y4dAHrvtt7Bw43E9gQfdgS", "question": "The namesake of these types of apples was a what?", "choices": ["grandmother", "brother", "uncle", "father"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000212886.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 466654, "question_id": "Y5MSNr9cgwBXNvK2NSGJ7B", "question": "What is this photo representative of?", "choices": ["time lapse", "superimposition", "gaussian blur", "opacity"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000466654.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 232457, "question_id": "YEc28eKZBGkboLyamcsXFL", "question": "This cake is meant to express what?", "choices": ["congratulations", "gratitude", "sympathy", "well wishes"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000232457.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 396267, "question_id": "YJbpfh2Ki2Y673cy3pwANo", "question": "What is the purpose of the slatted object on the window?", "choices": ["warmth", "protection", "privacy", "light"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000396267.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 437645, "question_id": "YSHmy2eUrL7DKd5HNgb83v", "question": "What is the native language of this airline's CEO?", "choices": ["english", "japanese", "afrikaans", "lithuanian"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000437645.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 152584, "question_id": "Yc2zTdCnsAVKhapqpi4zn9", "question": "When did this area become a borough?", "choices": ["1620", "1207", "1923", "1756"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000152584.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 388175, "question_id": "YfredFvNqsrbhkHJc26oZk", "question": "Why is that message displaying on the front of bus?", "choices": ["shift over", "ac broken", "bus full", "antisocial"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000388175.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485565, "question_id": "YtRXAYTR3ncCtGXNC5FsKu", "question": "How would they say the number four in their language?", "choices": ["fire", "vier", "cuatro", "quatre"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485565.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 217545, "question_id": "ZJQ3d4FfUTdHo524nbQ5Kk", "question": "What type of nut is the main topping on the doughnut?", "choices": ["cashew", "almond", "pistachio", "peanut"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000217545.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 136003, "question_id": "ZefyukHBD4ojJ4AhfL44Ax", "question": "In what year was the first patent for this type of appliance issued?", "choices": ["1930", "1922", "1960", "1945"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000136003.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390707, "question_id": "ZguUoG4BKrHzAT5LXYS5aZ", "question": "What action is the man most likely about to take with the disk?", "choices": ["throw", "kick", "break", "drop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000390707.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 268377, "question_id": "ZwJxyUUTGCCFqvgYmb2f5B", "question": "Which item has no relationship to the eating process?", "choices": ["textured white", "blue and white", "silver", "black and white"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000268377.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 542085, "question_id": "ZxReeKCAuqHTcG8359C82Y", "question": "This appliance cooks with what type of flame?", "choices": ["gas", "pellets", "electric", "coal"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000542085.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 392101, "question_id": "aSvyw6fQw9UqXrNfAnFB4w", "question": "Who will issue a ticket for incorrect parking?", "choices": ["cop", "firefighter", "fellow driver", "judge"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000392101.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 351639, "question_id": "agrnGwUT4VvvLPsh7MJ4L9", "question": "What is the strap on the man's shoulder used for?", "choices": ["hold up pants", "umbrella", "carry camera", "fashion"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000351639.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 362307, "question_id": "ahEs6cbPuvxG7N8gsfF7Jm", "question": "Why is the bird on the keyboard?", "choices": ["cameraman placed", "resting", "is hungry", "hiding"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000362307.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 451907, "question_id": "an4LVAKbAnpxkz7xnWhHUC", "question": "The outfit that the teddy bear is wearing is known as a what?", "choices": ["gi", "kimono", "hanbok", "kebaya"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000451907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 201221, "question_id": "axUBiB3NkX44fkJoVGbBbu", "question": "What could the car do that might upset the giraffe?", "choices": ["turn off engine", "drive forward", "drive backward", "stay still"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000201221.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 264368, "question_id": "bCcqFdHmzHgoNgkkjAyKAb", "question": "What company is Jared affiliated with?", "choices": ["toyota", "youguys media", "debeers", "subway"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000264368.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192398, "question_id": "bEj9M4yftJ5uAAQj8Zs4Zw", "question": "That type of sash is usually worn by what?", "choices": ["martial artist", "political candidate", "rodeo participant", "pageant contestant"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000192398.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 44390, "question_id": "bgpGhpBrBhxtHTZLAWiACY", "question": "What is the common term for the object on top of the clock?", "choices": ["flag", "weather vane", "pointer", "compass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000044390.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 309760, "question_id": "bhTBhwDhwihuLg4wogLLub", "question": "That sign is encouraging what?", "choices": ["pay toll", "slow down", "watch for pedestrians", "caution"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000309760.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 532074, "question_id": "cDzxtJm9it4VNzjL8h32Bj", "question": "What are the triangular shaped areas on the tail end of the plane sometimes called?", "choices": ["fins", "wings", "repeaters", "stabilizers"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000532074.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 394530, "question_id": "ce2Kc7Z7pD7mcbaPzGgtvh", "question": "Why is the guy's right hand on the bottom of the racket?", "choices": ["unsure of move", "making mistake", "dominant hand", "hurt left hand"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000394530.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91346, "question_id": "cp2pBJRjzBnJS8BkT9WHLk", "question": "Which one of these is related to the white vegetable shown?", "choices": ["pumpkin", "tomato", "shallot", "ginger"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091346.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 79540, "question_id": "cumRHRYFbsSuH2EN3h5osH", "question": "Who is taking this picture?", "choices": ["professional photographer", "snowboarder", "ski lift rider", "drone"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000079540.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 577388, "question_id": "cydubx7KscZvFTkkBDTzr2", "question": "The elephant's face wrinkles reveal that it is what?", "choices": ["elderly", "middle aged", "baby", "adolescent"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000577388.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 401415, "question_id": "dKoXK3qndAkAfQe5zWDfiA", "question": "What type of bed does someone this age usually sleep in?", "choices": ["crib", "king", "twin", "queen"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000401415.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554907, "question_id": "dsbBmbDgTQg8rCvqWfmqXn", "question": "The dog on the surfboard is probably good at what?", "choices": ["diving", "boogie boarding", "swimming", "surfing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000554907.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 255712, "question_id": "dv8GN3T6q9Ft64DWPe8q7L", "question": "What should be used to protect his knees?", "choices": ["tape", "padded clothing", "knee pad", "helmet"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000255712.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 289556, "question_id": "dxk4o2WCp2dMLCjvqQ6nHu", "question": "This image showcases what cooking style?", "choices": ["molecular gastronomy", "sousvide", "poaching", "farm to table"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000289556.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 221389, "question_id": "e7cS386jueTeYJPEjQ7ams", "question": "The bottom layer is most likely what flavor?", "choices": ["vanilla", "rum", "strawberry", "chocolate"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000221389.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 358670, "question_id": "eJM9qH3giecwtmPvJE4qdA", "question": "How did this singer die?", "choices": ["boating accident", "pneumonia", "plane crash", "car crash"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000358670.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 327877, "question_id": "ea2NFkpAYzFArkvwsBy6CT", "question": "When did the current CEO of this technology company take over?", "choices": ["2001", "2020", "2005", "2010"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000327877.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 210673, "question_id": "euFdY6dN9Tv43aUZMRKTw8", "question": "What is the activity the man is performing called?", "choices": ["windsurfing", "parasailing", "wave riding", "surfing"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000210673.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 131873, "question_id": "f9AgSWdVdW3L8aNWqoSvCP", "question": "What is the horns on this animal called?", "choices": ["ossicones", "tuffs", "ears", "snouts"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000131873.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 494277, "question_id": "fC3jo35sLJmRFMs7KztG7Q", "question": "What is the white substance present on the walls of the kitchen?", "choices": ["joint compound", "glue", "plaster", "cement"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000494277.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 272347, "question_id": "gdRZYrSHv3TSZDRtmZt3UF", "question": "What day of the week is mentioned first on the poster?", "choices": ["saturday", "wednesday", "sunday", "tuesday"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000272347.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 510708, "question_id": "gswXvaXsqp9fm9EH4Rfp3n", "question": "What is a drink that can be made from the packet labelled Rajah?", "choices": ["herbal tea", "tea", "coffee", "hot chocolate"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000510708.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 112216, "question_id": "gwAyb3zZ7AoGHNmJGB6fNv", "question": "What might cause an odd flavor that was added to the food?", "choices": ["salt", "seasoning", "butter", "ketchup"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000112216.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427042, "question_id": "h9kGwfovcsp6rVtQmXUuLg", "question": "The props on the planes shown here are off what will happen to the planes now?", "choices": ["rise violently", "reverse course", "retain altitude", "free fall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427042.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 574756, "question_id": "hKEkSWJHk4g8Vt7WgDotew", "question": "Where is the safest place for the surfer to strap his board to himself?", "choices": ["wrist", "waist", "ankle", "neck"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000574756.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 569601, "question_id": "hRdeMnxg2wPdbpcbSC2FBW", "question": "The furnishings in this home could be described as what?", "choices": ["modern", "exotic", "cutting edge", "outdated"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000569601.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 177043, "question_id": "hVpVhUnkcuiBKHueXknwei", "question": "What tree does the insignia symbolize?", "choices": ["pine", "cherry", "oak", "maple"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000177043.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 427678, "question_id": "hXwSKkcWUGj4fpKsxZmsCb", "question": "What language is most likely spoken in the country where the clock is located?", "choices": ["english", "italian", "russian", "greek"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000427678.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 315953, "question_id": "hehxpSyK3FDc2SXyJXGSy5", "question": "Why does the man on the surfboard have no visible feet?", "choices": ["rendering error", "artist mistake", "design choice", "clipping error"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000315953.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 47712, "question_id": "iTTKMVnV5i7XxCd3ESa3MX", "question": "What type of cat is shown here?", "choices": ["persian", "chinese hairless", "siamese", "tabby"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000047712.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 554999, "question_id": "iVvbors2WN9ZsAYwebDckY", "question": "This lists a TLD from what country?", "choices": ["kuwait", "kenya", "kosovo", "korea"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000554999.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 549803, "question_id": "inwQvc3YrFEpGVaDQXywFa", "question": "What does the 21 sign next to the door indicate this business serves?", "choices": ["alcohol", "burgers", "sushi", "thai chi"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000549803.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135830, "question_id": "isBve2R7JhfPbD28T2tHxY", "question": "The man is reading in which location?", "choices": ["van", "public bus", "light rail", "train"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135830.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 540243, "question_id": "j2RrDtBCUiibQLWEn8a3PL", "question": "What section of the supermarket would one find the ingredients to make this item?", "choices": ["baking", "international", "produce", "household"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000540243.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 207435, "question_id": "j89q7aVGaUPjQi5zRhrd4Q", "question": "What is missing from the electrical outlets?", "choices": ["electrical plug", "wall plate", "copper wiring", "bracket"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000207435.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 340606, "question_id": "jPQCHoBQL4yFpFKnbv9ujb", "question": "Which one of these might be able to make the decorative piece that hangs over the dogs?", "choices": ["electrician", "welder", "engineer", "carpenter"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000340606.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 194160, "question_id": "jTNGgMmXkgph8Av6XTdoFW", "question": "In which arrondissement is this airline's vaccination center?", "choices": ["7th", "1st", "3rd", "10th"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000194160.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 463381, "question_id": "jTztUBBwsq3vZFoAACkC3p", "question": "What is needed to make Ramsay and permanent sign?", "choices": ["glue", "pedastal", "wire", "pole"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000463381.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 283592, "question_id": "jVwE3a4T2roNN5EEUm3LZY", "question": "What type of light fixture is located above the sink?", "choices": ["chandelier", "globe", "track", "recessed"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000283592.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 198744, "question_id": "jtztSEhu38VVs5FGkzSkKC", "question": "The pillars in the ground are marking what?", "choices": ["mines", "treasure", "dog traps", "graves"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000198744.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 78368, "question_id": "jw3yw27GnW7WxuUKeZ2DRc", "question": "What lake is the city on?", "choices": ["ontario", "superior", "huron", "michigan"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000078368.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 50866, "question_id": "jx7yMuCh8iXtwYYSbZ9fAQ", "question": "What is the name for meat that has been treated this way?", "choices": ["jumped", "pushed", "pulled", "shoved"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000050866.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 117938, "question_id": "jyb2qzEV28gmWNKqtgt5zo", "question": "Why is the wire a part of the fence?", "choices": ["keep animals in", "keep away thieves", "keep away birds", "mark territory"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000117938.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 485110, "question_id": "k8GFY54bknTSaWp6vP4ZLQ", "question": "What activity would get us in trouble here?", "choices": ["right turn", "going straight", "turn left", "walking across"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000485110.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 186905, "question_id": "kYMbnwKeWuh2iLe4mzKPuP", "question": "What most likely happened here recently?", "choices": ["fire", "tornado", "flood", "earthquake"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000186905.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 156575, "question_id": "kZKVsXM9BkNMoBXdXCradZ", "question": "Around how fast is the max speed of the paddle board?", "choices": ["18 mph", "28 mph", "8 mph", "44 mph"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000156575.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 323292, "question_id": "kdspgEPZzwDyXPR8DMd53R", "question": "Why is the plant in the image light yellow?", "choices": ["bird urinated", "plant is dead", "different plant species", "sun glare"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000323292.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 551557, "question_id": "kucCudL2Yr5ZehLhCU9Hv9", "question": "What makes it safe for pedestrians to cross the train tracks while the train is moving?", "choices": ["blockers", "brick wall", "bridge", "train stop"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000551557.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 415285, "question_id": "mFfmbYTTZqapkesUB5MbWj", "question": "What kind of creature is Calvin's friend that is mentioned?", "choices": ["tiger", "bear", "elephant", "dog"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000415285.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 440242, "question_id": "mKXEfmRND594n4cogS8RfZ", "question": "What's wrong with this clock?", "choices": ["being rusty", "inaccurate reading", "low battery", "falling"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000440242.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 135792, "question_id": "mrBXcjQmnP6QyXGwqUW8kt", "question": "What language is the alphabet in that's watermarked on the image?", "choices": ["latin", "spanish", "arabic", "english"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000135792.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 192906, "question_id": "n3ErtgsyVMHbYiBcvkm8zK", "question": "What is black device on the window sill with the LCD screen measuring?", "choices": ["temperature", "air pressure", "humidity", "light"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000192906.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 390054, "question_id": "nFEqDrcEXDoV2WV3vnak5p", "question": "These people are dressed as pieces from what popular toy set?", "choices": ["lite brite", "lincoln log", "barbie", "lego"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000390054.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 360360, "question_id": "nJJzwiqU2ME3pMzj5tDLUy", "question": "What fruit does his toy resemble?", "choices": ["pomegranate", "orange", "watermelon", "cherry"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000360360.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 271901, "question_id": "nMAp4T9iekt9WT6faaYFAg", "question": "Whom is the yellow plate on the sidewalk helpful for?", "choices": ["walker users", "deaf folks", "blind folks", "wheelchair users"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000271901.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 160697, "question_id": "naaweymtLUrNyQz5ymwcZx", "question": "Which one of these would be common among this type of cat?", "choices": ["cold", "loneliness", "matting", "heart attack"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000160697.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 174047, "question_id": "nqfDHiJiHm47BeRFixREvE", "question": "What is the French name of the accent mark in the second word?", "choices": ["cedille", "trema", "circonflexe", "grave"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000174047.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 92473, "question_id": "o9XPAVBgom6LZBYbpsGtpM", "question": "What decade was this bus probably made in?", "choices": ["1950", "2010", "1990", "2020"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000092473.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 89887, "question_id": "oVGpoumwUM8AzpgiEDFo7d", "question": "They are simulating a what using the teddy bear?", "choices": ["ski lift", "bungee jump", "rodeo", "zip line"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000089887.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 91205, "question_id": "5bNXVcS8NkjmJYFNPcE4m8", "question": "What time of day is it?", "choices": ["night", "morning", "evening", "afternoon"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000091205.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 555902, "question_id": "9bL6gcWpbT6V4ExRBo5eCy", "question": "What is most unnatural here?", "choices": ["lamb nose", "lamb's coat", "nothing", "ewe"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000555902.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 188783, "question_id": "DDKZ8jAZH68xkz2mfGNmnA", "question": "What is the folk name for the green vegetable?", "choices": ["leprechaun tree", "sparrow grass", "fairy tree", "gnome grass"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000188783.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 519658, "question_id": "NSwta5YF6BivNKbbmSL77C", "question": "What action likely happened a few seconds before?", "choices": ["throw", "bite", "slash", "fall"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000519658.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 359460, "question_id": "XXtfsPBhyDdvQVMehsarvM", "question": "Which woman won this tournament in 2021?", "choices": ["williams", "osaka", "fernandez", "raducanu"], "difficult_direct_answer": false, "image": "test2015/COCO_test2015_000000359460.jpg", "dataset": "aokvqa"}, {"split": "test", "image_id": 142957, "question_id": "mxAPu44kjeW2qAGNovYZMs", "question": "Why is the girl above the water?", "choices": ["flotation device", "current", "levitation", "dolphins"], "difficult_direct_answer": true, "image": "test2015/COCO_test2015_000000142957.jpg", "dataset": "aokvqa"}]