diff --git a/utils.py b/utils.py index 64cc2cf..b5b996c 100644 --- a/utils.py +++ b/utils.py @@ -42,31 +42,37 @@ def search_images_bing(key, term, min_sz=128, max_images=150): # - -def search_images_ddg(key,max_n=200): - """Search for 'key' with DuckDuckGo and return a unique urls of 'max_n' images - (Adopted from https://github.com/deepanprabhu/duckduckgo-images-api) - """ - url = 'https://duckduckgo.com/' - params = {'q':key} - res = requests.post(url,data=params) - searchObj = re.search(r'vqd=([\d-]+)\&',res.text) - if not searchObj: print('Token Parsing Failed !'); return - requestUrl = url + 'i.js' - headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'} - params = (('l','us-en'),('o','json'),('q',key),('vqd',searchObj.group(1)),('f',',,,'),('p','1'),('v7exp','a')) - urls = [] - while True: - try: - res = requests.get(requestUrl,headers=headers,params=params) - data = json.loads(res.text) - for obj in data['results']: - urls.append(obj['image']) - max_n = max_n - 1 - if max_n < 1: return L(set(urls)) # dedupe - if 'next' not in data: return L(set(urls)) - requestUrl = url + data['next'] - except: - pass +def search_images_ddg(key, max_n=200): + """ + Search for 'key' with DuckDuckGo and return unique URLs of up to 'max_n' images. + If an error occurs or token parsing fails, prints an error message and returns collected URLs. + """ + url = 'https://duckduckgo.com/' + params = {'q': key} + try: + res = requests.post(url, data=params) + searchObj = re.search(r'vqd=([\d-]+)&', res.text) + if not searchObj: + print('Token Parsing Failed! Unable to retrieve search token.') + return L(set()) # return an empty list-like object + requestUrl = url + 'i.js' + headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'} + params = (('l', 'us-en'), ('o', 'json'), ('q', key), ('vqd', searchObj.group(1)), ('f', ',,,'), ('p', '1'), ('v7exp', 'a')) + urls = [] + while max_n > 0: + res = requests.get(requestUrl, headers=headers, params=params) + data = json.loads(res.text) + for obj in data['results']: + urls.append(obj['image']) + max_n -= 1 + if max_n < 1: + return L(set(urls)) # dedupe and return + if 'next' not in data: + return L(set(urls)) + requestUrl = url + data['next'] + except Exception as e: + print(f"An error occurred: {e}") + return L(set(urls)) # return whatever has been gathered so far def plot_function(f, tx=None, ty=None, title=None, min=-2, max=2, figsize=(6,4)):