summaryrefslogtreecommitdiff
path: root/searx/results.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/results.py')
-rw-r--r--searx/results.py68
1 files changed, 31 insertions, 37 deletions
diff --git a/searx/results.py b/searx/results.py
index 02ab9efb1..e4cad2e24 100644
--- a/searx/results.py
+++ b/searx/results.py
@@ -1,13 +1,11 @@
import re
-import sys
from collections import defaultdict
from operator import itemgetter
from threading import RLock
+from urllib.parse import urlparse, unquote
+from searx import logger
from searx.engines import engines
-from searx.url_utils import urlparse, unquote
-if sys.version_info[0] == 3:
- basestring = str
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@@ -15,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
# return the meaningful length of the content for a result
def result_content_len(content):
- if isinstance(content, basestring):
+ if isinstance(content, str):
return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
else:
return 0
@@ -60,6 +58,8 @@ def merge_two_infoboxes(infobox1, infobox2):
if weight2 > weight1:
infobox1['engine'] = infobox2['engine']
+ infobox1['engines'] |= infobox2['engines']
+
if 'urls' in infobox2:
urls1 = infobox1.get('urls', None)
if urls1 is None:
@@ -122,72 +122,65 @@ def result_score(result):
return sum((occurences * weight) / position for position in result['positions'])
-class ResultContainer(object):
+class ResultContainer:
"""docstring for ResultContainer"""
+ __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\
+ '_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url'
+
def __init__(self):
- super(ResultContainer, self).__init__()
- self.results = defaultdict(list)
+ super().__init__()
self._merged_results = []
self.infoboxes = []
self.suggestions = set()
- self.answers = set()
+ self.answers = {}
self.corrections = set()
self._number_of_results = []
self._ordered = False
self.paging = False
self.unresponsive_engines = set()
self.timings = []
+ self.redirect_url = None
def extend(self, engine_name, results):
+ standard_result_count = 0
for result in list(results):
result['engine'] = engine_name
if 'suggestion' in result:
self.suggestions.add(result['suggestion'])
- results.remove(result)
elif 'answer' in result:
- self.answers.add(result['answer'])
- results.remove(result)
+ self.answers[result['answer']] = result
elif 'correction' in result:
self.corrections.add(result['correction'])
- results.remove(result)
elif 'infobox' in result:
self._merge_infobox(result)
- results.remove(result)
elif 'number_of_results' in result:
self._number_of_results.append(result['number_of_results'])
- results.remove(result)
+ else:
+ # standard result (url, title, content)
+ if 'url' in result and not isinstance(result['url'], str):
+ logger.debug('result: invalid URL: %s', str(result))
+ elif 'title' in result and not isinstance(result['title'], str):
+ logger.debug('result: invalid title: %s', str(result))
+ elif 'content' in result and not isinstance(result['content'], str):
+ logger.debug('result: invalid content: %s', str(result))
+ else:
+ self._merge_result(result, standard_result_count + 1)
+ standard_result_count += 1
if engine_name in engines:
with RLock():
engines[engine_name].stats['search_count'] += 1
- engines[engine_name].stats['result_count'] += len(results)
-
- if not results:
- return
+ engines[engine_name].stats['result_count'] += standard_result_count
- self.results[engine_name].extend(results)
-
- if not self.paging and engine_name in engines and engines[engine_name].paging:
+ if not self.paging and standard_result_count > 0 and engine_name in engines\
+ and engines[engine_name].paging:
self.paging = True
- for i, result in enumerate(results):
- if 'url' in result and not isinstance(result['url'], basestring):
- continue
- try:
- result['url'] = result['url'].decode('utf-8')
- except:
- pass
- if 'title' in result and not isinstance(result['title'], basestring):
- continue
- if 'content' in result and not isinstance(result['content'], basestring):
- continue
- position = i + 1
- self._merge_result(result, position)
-
def _merge_infobox(self, infobox):
add_infobox = True
infobox_id = infobox.get('id', None)
+ infobox['engines'] = set([infobox['engine']])
if infobox_id is not None:
parsed_url_infobox_id = urlparse(infobox_id)
for existingIndex in self.infoboxes:
@@ -346,7 +339,8 @@ class ResultContainer(object):
return resultnum_sum / len(self._number_of_results)
def add_unresponsive_engine(self, engine_name, error_type, error_message=None):
- self.unresponsive_engines.add((engine_name, error_type, error_message))
+ if engines[engine_name].display_error_messages:
+ self.unresponsive_engines.add((engine_name, error_type, error_message))
def add_timing(self, engine_name, engine_time, page_load_time):
self.timings.append({