summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--searx/search.py69
1 files changed, 36 insertions, 33 deletions
diff --git a/searx/search.py b/searx/search.py
index 5e036ccca..d1d03805f 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -19,7 +19,6 @@ import requests as requests_lib
import threading
import re
from itertools import izip_longest, chain
-from datetime import datetime
from operator import itemgetter
from Queue import Queue
from time import time
@@ -35,16 +34,31 @@ from searx.query import Query
number_of_searches = 0
+def search_request_wrapper(fn, url, engine_name, **kwargs):
+ try:
+ return fn(url, **kwargs)
+ except Exception, e:
+ # increase errors stats
+ engines[engine_name].stats['errors'] += 1
+
+ # print engine name and specific error message
+ print('[E] Error with engine "{0}":\n\t{1}'.format(
+ engine_name, str(e)))
+ return
+
+
def threaded_requests(requests):
timeout_limit = max(r[2]['timeout'] for r in requests)
search_start = time()
- for fn, url, request_args in requests:
+ for fn, url, request_args, engine_name in requests:
+ request_args['timeout'] = timeout_limit
th = threading.Thread(
- target=fn,
- args=(url,),
+ target=search_request_wrapper,
+ args=(fn, url, engine_name),
kwargs=request_args,
name='search_request',
)
+ th._engine_name = engine_name
th.start()
for th in threading.enumerate():
@@ -52,7 +66,7 @@ def threaded_requests(requests):
remaining_time = max(0.0, timeout_limit - (time() - search_start))
th.join(remaining_time)
if th.isAlive():
- print('engine timeout')
+ print('engine timeout: {0}'.format(th._engine_name))
@@ -63,30 +77,23 @@ def default_request_params():
# create a callback wrapper for the search engine results
-def make_callback(engine_name,
- results_queue,
- suggestions,
- answers,
- infoboxes,
- callback,
- params):
+def make_callback(engine_name, results_queue, callback, params):
# creating a callback wrapper for the search engine results
def process_callback(response, **kwargs):
response.search_params = params
- # callback
- try:
- search_results = callback(response)
- except Exception, e:
- # increase errors stats
+ timeout_overhead = 0.2 # seconds
+ search_duration = time() - params['started']
+ timeout_limit = engines[engine_name].timeout + timeout_overhead
+ if search_duration > timeout_limit:
+ engines[engine_name].stats['page_load_time'] += timeout_limit
engines[engine_name].stats['errors'] += 1
-
- # print engine name and specific error message
- print '[E] Error with engine "{0}":\n\t{1}'.format(
- engine_name, str(e))
return
+ # callback
+ search_results = callback(response)
+
# add results
for result in search_results:
result['engine'] = engine_name
@@ -94,8 +101,7 @@ def make_callback(engine_name,
results_queue.put_nowait((engine_name, search_results))
# update stats with current page-load-time
- engines[engine_name].stats['page_load_time'] += \
- (datetime.now() - params['started']).total_seconds()
+ engines[engine_name].stats['page_load_time'] += search_duration
return process_callback
@@ -408,6 +414,7 @@ class Search(object):
# init vars
requests = []
results_queue = Queue()
+ results = {}
suggestions = set()
answers = set()
infoboxes = []
@@ -439,14 +446,13 @@ class Search(object):
request_params = default_request_params()
request_params['headers']['User-Agent'] = user_agent
request_params['category'] = selected_engine['category']
- request_params['started'] = datetime.now()
+ request_params['started'] = time()
request_params['pageno'] = self.pageno
request_params['language'] = self.lang
# update request parameters dependent on
# search-engine (contained in engines folder)
- request_params = engine.request(self.query.encode('utf-8'),
- request_params)
+ engine.request(self.query.encode('utf-8'), request_params)
if request_params['url'] is None:
# TODO add support of offline engines
@@ -456,12 +462,8 @@ class Search(object):
callback = make_callback(
selected_engine['name'],
results_queue,
- suggestions,
- answers,
- infoboxes,
engine.response,
- request_params
- )
+ request_params)
# create dictionary which contain all
# informations about the request
@@ -485,12 +487,13 @@ class Search(object):
continue
# append request to list
- requests.append((req, request_params['url'], request_args))
+ requests.append((req, request_params['url'], request_args, selected_engine['name']))
+ if not requests:
+ return results, suggestions, answers, infoboxes
# send all search-request
threaded_requests(requests)
- results = {}
while not results_queue.empty():
engine_name, engine_results = results_queue.get_nowait()