diff options
| -rw-r--r-- | searx/search.py | 69 |
1 files changed, 36 insertions, 33 deletions
diff --git a/searx/search.py b/searx/search.py index 5e036ccca..d1d03805f 100644 --- a/searx/search.py +++ b/searx/search.py @@ -19,7 +19,6 @@ import requests as requests_lib import threading import re from itertools import izip_longest, chain -from datetime import datetime from operator import itemgetter from Queue import Queue from time import time @@ -35,16 +34,31 @@ from searx.query import Query number_of_searches = 0 +def search_request_wrapper(fn, url, engine_name, **kwargs): + try: + return fn(url, **kwargs) + except Exception, e: + # increase errors stats + engines[engine_name].stats['errors'] += 1 + + # print engine name and specific error message + print('[E] Error with engine "{0}":\n\t{1}'.format( + engine_name, str(e))) + return + + def threaded_requests(requests): timeout_limit = max(r[2]['timeout'] for r in requests) search_start = time() - for fn, url, request_args in requests: + for fn, url, request_args, engine_name in requests: + request_args['timeout'] = timeout_limit th = threading.Thread( - target=fn, - args=(url,), + target=search_request_wrapper, + args=(fn, url, engine_name), kwargs=request_args, name='search_request', ) + th._engine_name = engine_name th.start() for th in threading.enumerate(): @@ -52,7 +66,7 @@ def threaded_requests(requests): remaining_time = max(0.0, timeout_limit - (time() - search_start)) th.join(remaining_time) if th.isAlive(): - print('engine timeout') + print('engine timeout: {0}'.format(th._engine_name)) @@ -63,30 +77,23 @@ def default_request_params(): # create a callback wrapper for the search engine results -def make_callback(engine_name, - results_queue, - suggestions, - answers, - infoboxes, - callback, - params): +def make_callback(engine_name, results_queue, callback, params): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): response.search_params = params - # callback - try: - search_results = callback(response) - except Exception, e: - # increase errors stats + timeout_overhead = 0.2 # seconds + search_duration = time() - params['started'] + timeout_limit = engines[engine_name].timeout + timeout_overhead + if search_duration > timeout_limit: + engines[engine_name].stats['page_load_time'] += timeout_limit engines[engine_name].stats['errors'] += 1 - - # print engine name and specific error message - print '[E] Error with engine "{0}":\n\t{1}'.format( - engine_name, str(e)) return + # callback + search_results = callback(response) + # add results for result in search_results: result['engine'] = engine_name @@ -94,8 +101,7 @@ def make_callback(engine_name, results_queue.put_nowait((engine_name, search_results)) # update stats with current page-load-time - engines[engine_name].stats['page_load_time'] += \ - (datetime.now() - params['started']).total_seconds() + engines[engine_name].stats['page_load_time'] += search_duration return process_callback @@ -408,6 +414,7 @@ class Search(object): # init vars requests = [] results_queue = Queue() + results = {} suggestions = set() answers = set() infoboxes = [] @@ -439,14 +446,13 @@ class Search(object): request_params = default_request_params() request_params['headers']['User-Agent'] = user_agent request_params['category'] = selected_engine['category'] - request_params['started'] = datetime.now() + request_params['started'] = time() request_params['pageno'] = self.pageno request_params['language'] = self.lang # update request parameters dependent on # search-engine (contained in engines folder) - request_params = engine.request(self.query.encode('utf-8'), - request_params) + engine.request(self.query.encode('utf-8'), request_params) if request_params['url'] is None: # TODO add support of offline engines @@ -456,12 +462,8 @@ class Search(object): callback = make_callback( selected_engine['name'], results_queue, - suggestions, - answers, - infoboxes, engine.response, - request_params - ) + request_params) # create dictionary which contain all # informations about the request @@ -485,12 +487,13 @@ class Search(object): continue # append request to list - requests.append((req, request_params['url'], request_args)) + requests.append((req, request_params['url'], request_args, selected_engine['name'])) + if not requests: + return results, suggestions, answers, infoboxes # send all search-request threaded_requests(requests) - results = {} while not results_queue.empty(): engine_name, engine_results = results_queue.get_nowait() |