diff options
| -rw-r--r-- | AUTHORS.rst | 1 | ||||
| -rw-r--r-- | searx/engines/__init__.py | 16 | ||||
| -rw-r--r-- | searx/engines/piratebay.py | 2 | ||||
| -rw-r--r-- | searx/search.py | 61 | ||||
| -rw-r--r-- | searx/settings.yml | 6 | ||||
| -rw-r--r-- | searx/templates/courgette/result_templates/default.html | 2 | ||||
| -rw-r--r-- | searx/templates/courgette/result_templates/images.html | 2 | ||||
| -rw-r--r-- | searx/templates/courgette/result_templates/map.html | 2 | ||||
| -rw-r--r-- | searx/templates/courgette/result_templates/videos.html | 6 | ||||
| -rw-r--r-- | searx/templates/courgette/results.html | 6 | ||||
| -rw-r--r-- | searx/templates/default/infobox.html | 2 | ||||
| -rw-r--r-- | searx/templates/default/result_templates/default.html | 2 | ||||
| -rw-r--r-- | searx/templates/default/result_templates/images.html | 2 | ||||
| -rw-r--r-- | searx/templates/default/result_templates/map.html | 2 | ||||
| -rw-r--r-- | searx/templates/default/result_templates/torrent.html | 5 | ||||
| -rw-r--r-- | searx/templates/default/result_templates/videos.html | 6 | ||||
| -rw-r--r-- | searx/templates/default/results.html | 2 | ||||
| -rw-r--r-- | searx/tests/test_webapp.py | 2 |
18 files changed, 69 insertions, 58 deletions
diff --git a/AUTHORS.rst b/AUTHORS.rst index dacb45923..ef1ae7809 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -28,3 +28,4 @@ generally made searx better: - @courgette - @kernc - @Cqoicebordel +- @Reventl0v diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 80356a8cd..3c9ce3b57 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -41,11 +41,8 @@ def load_module(filename): module.name = modname return module -if 'engines' not in settings or not settings['engines']: - print '[E] Error no engines found. Edit your settings.yml' - exit(2) -for engine_data in settings['engines']: +def load_engine(engine_data): engine_name = engine_data['engine'] engine = load_module(engine_name + '.py') @@ -87,7 +84,6 @@ for engine_data in settings['engines']: print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa sys.exit(1) - engines[engine.name] = engine engine.stats = { 'result_count': 0, 'search_count': 0, @@ -105,6 +101,7 @@ for engine_data in settings['engines']: if engine.shortcut: # TODO check duplications engine_shortcuts[engine.shortcut] = engine.name + return engine def get_engines_stats(): @@ -194,3 +191,12 @@ def get_engines_stats(): sorted(errors, key=itemgetter('avg'), reverse=True) ), ] + + +if 'engines' not in settings or not settings['engines']: + print '[E] Error no engines found. Edit your settings.yml' + exit(2) + +for engine_data in settings['engines']: + engine = load_engine(engine_data) + engines[engine.name] = engine diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index 14905dc83..f6144faa2 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -19,7 +19,7 @@ categories = ['videos', 'music', 'files'] paging = True # search-url -url = 'https://thepiratebay.se/' +url = 'https://thepiratebay.cr/' search_url = url + 'search/{search_term}/{pageno}/99/{search_type}' # piratebay specific type-definitions diff --git a/searx/search.py b/searx/search.py index 5b5cc6200..4058cba20 100644 --- a/searx/search.py +++ b/searx/search.py @@ -21,6 +21,8 @@ import re from itertools import izip_longest, chain from datetime import datetime from operator import itemgetter +from Queue import Queue +from time import time from urlparse import urlparse, unquote from searx.engines import ( categories, engines @@ -34,6 +36,8 @@ number_of_searches = 0 def threaded_requests(requests): + timeout_limit = max(r[2]['timeout'] for r in requests) + search_start = time() for fn, url, request_args in requests: th = threading.Thread( target=fn, @@ -45,7 +49,11 @@ def threaded_requests(requests): for th in threading.enumerate(): if th.name == 'search_request': - th.join() + remaining_time = max(0.0, timeout_limit - (time() - search_start)) + th.join(remaining_time) + if th.isAlive(): + print('engine timeout') + # get default reqest parameter @@ -56,7 +64,7 @@ def default_request_params(): # create a callback wrapper for the search engine results def make_callback(engine_name, - results, + results_queue, suggestions, answers, infoboxes, @@ -65,7 +73,6 @@ def make_callback(engine_name, # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): - cb_res = [] response.search_params = params # callback @@ -74,7 +81,6 @@ def make_callback(engine_name, except Exception, e: # increase errors stats engines[engine_name].stats['errors'] += 1 - results[engine_name] = cb_res # print engine name and specific error message print '[E] Error with engine "{0}":\n\t{1}'.format( @@ -85,26 +91,7 @@ def make_callback(engine_name, for result in search_results: result['engine'] = engine_name - # if it is a suggestion, add it to list of suggestions - if 'suggestion' in result: - # TODO type checks - suggestions.add(result['suggestion']) - continue - - # if it is an answer, add it to list of answers - if 'answer' in result: - answers.add(result['answer']) - continue - - # if it is an infobox, add it to list of infoboxes - if 'infobox' in result: - infoboxes.append(result) - continue - - # append result - cb_res.append(result) - - results[engine_name] = cb_res + results_queue.put_nowait((engine_name, search_results)) # update stats with current page-load-time engines[engine_name].stats['page_load_time'] += \ @@ -420,7 +407,7 @@ class Search(object): # init vars requests = [] - results = {} + results_queue = Queue() suggestions = set() answers = set() infoboxes = [] @@ -468,7 +455,7 @@ class Search(object): # create a callback wrapper for the search engine results callback = make_callback( selected_engine['name'], - results, + results_queue, suggestions, answers, infoboxes, @@ -502,6 +489,28 @@ class Search(object): # send all search-request threaded_requests(requests) + results = {} + + while not results_queue.empty(): + engine_name, engine_results = results_queue.get_nowait() + + # TODO type checks + [suggestions.add(x['suggestion']) + for x in list(engine_results) + if 'suggestion' in x + and engine_results.remove(x) is None] + + [answers.add(x['answer']) + for x in list(engine_results) + if 'answer' in x + and engine_results.remove(x) is None] + + infoboxes.extend(x for x in list(engine_results) + if 'infobox' in x + and engine_results.remove(x) is None) + + results[engine_name] = engine_results + # update engine-specific stats for engine_name, engine_results in results.items(): engines[engine_name].stats['search_count'] += 1 diff --git a/searx/settings.yml b/searx/settings.yml index 8f89c01d1..b51b37f1c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -95,9 +95,9 @@ engines: engine : openstreetmap shortcut : osm - - name : piratebay - engine : piratebay - shortcut : tpb +# - name : piratebay +# engine : piratebay +# shortcut : tpb - name : kickass engine : kickass diff --git a/searx/templates/courgette/result_templates/default.html b/searx/templates/courgette/result_templates/default.html index 734f9066c..5a8a78198 100644 --- a/searx/templates/courgette/result_templates/default.html +++ b/searx/templates/courgette/result_templates/default.html @@ -1,7 +1,7 @@ <div class="result {{ result.class }}"> {% if result['favicon'] %} - <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" /> + <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" /> {% endif %} <div> diff --git a/searx/templates/courgette/result_templates/images.html b/searx/templates/courgette/result_templates/images.html index 1f15ff2bb..ebda5380b 100644 --- a/searx/templates/courgette/result_templates/images.html +++ b/searx/templates/courgette/result_templates/images.html @@ -1,6 +1,6 @@ <div class="image_result"> <p> - <a href="{{ result.img_src }}"><img src="{{ result.img_src }}" title={{ result.title }}/></a> + <a href="{{ result.img_src }}"><img src="{{ result.img_src }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> <span class="url"><a href="{{ result.url }}" class="small_font">original context</a></span> </p> </div> diff --git a/searx/templates/courgette/result_templates/map.html b/searx/templates/courgette/result_templates/map.html index 734f9066c..5a8a78198 100644 --- a/searx/templates/courgette/result_templates/map.html +++ b/searx/templates/courgette/result_templates/map.html @@ -1,7 +1,7 @@ <div class="result {{ result.class }}"> {% if result['favicon'] %} - <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" /> + <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" /> {% endif %} <div> diff --git a/searx/templates/courgette/result_templates/videos.html b/searx/templates/courgette/result_templates/videos.html index 8ceb0b180..c8d1a39b6 100644 --- a/searx/templates/courgette/result_templates/videos.html +++ b/searx/templates/courgette/result_templates/videos.html @@ -1,12 +1,10 @@ <div class="result"> {% if result['favicon'] %} - <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" /> + <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" /> {% endif %} - <p> <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> {% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %} - <a href="{{ result.url }}"><img width="400px" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a> + <a href="{{ result.url }}"><img width="400" src="{{ result.thumbnail }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> <p class="url">{{ result.url }}</p> - </p> </div> diff --git a/searx/templates/courgette/results.html b/searx/templates/courgette/results.html index d0b53b48a..62bef8c90 100644 --- a/searx/templates/courgette/results.html +++ b/searx/templates/courgette/results.html @@ -10,7 +10,7 @@ <div id="search_url"> {{ _('Search URL') }}: - <input type="text" value="{{ base_url }}?q={{ q|urlencode }}&pageno={{ pageno }}{% if selected_categories %}&category_{{ selected_categories|join("&category_") }}{% endif %}" readonly="" /> + <input type="text" value="{{ base_url }}?q={{ q|urlencode }}&pageno={{ pageno }}{% if selected_categories %}&category_{{ selected_categories|join("&category_")|replace(' ','+') }}{% endif %}" readonly /> </div> <div id="apis"> {{ _('Download results') }} @@ -43,9 +43,9 @@ {% for result in results %} {% if result['template'] %} - {% include 'default/result_templates/'+result['template'] %} + {% include 'courgette/result_templates/'+result['template'] %} {% else %} - {% include 'default/result_templates/default.html' %} + {% include 'courgette/result_templates/default.html' %} {% endif %} {% endfor %} diff --git a/searx/templates/default/infobox.html b/searx/templates/default/infobox.html index d03b008f9..d3ff8f06d 100644 --- a/searx/templates/default/infobox.html +++ b/searx/templates/default/infobox.html @@ -1,6 +1,6 @@ <div class="infobox"> <h2>{{ infobox.infobox }}</h2> - {% if infobox.img_src %}<img src="{{ infobox.img_src }}" />{% endif %} + {% if infobox.img_src %}<img src="{{ infobox.img_src }}" title="{{ infobox.infobox|striptags }}" alt="{{ infobox.infobox|striptags }}" />{% endif %} <p>{{ infobox.entity }}</p> <p>{{ infobox.content | safe }}</p> {% if infobox.attributes %} diff --git a/searx/templates/default/result_templates/default.html b/searx/templates/default/result_templates/default.html index d0e725ac5..c41c58fa1 100644 --- a/searx/templates/default/result_templates/default.html +++ b/searx/templates/default/result_templates/default.html @@ -1,5 +1,5 @@ <div class="result {{ result.class }}"> - <h3 class="result_title"> {% if result['favicon'] %}<img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> + <h3 class="result_title"> {% if result['favicon'] %}<img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> <p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a></p> {% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %} <p class="content">{% if result.img_src %}<img src="{{ result.img_src }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> diff --git a/searx/templates/default/result_templates/images.html b/searx/templates/default/result_templates/images.html index bead78c48..4c6d59e01 100644 --- a/searx/templates/default/result_templates/images.html +++ b/searx/templates/default/result_templates/images.html @@ -1,6 +1,6 @@ <div class="image_result"> <p> - <a href="{{ result.img_src }}"><img src="{{ result.img_src }}" title="{{ result.title }}"/></a> + <a href="{{ result.img_src }}"><img src="{{ result.img_src }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}" /></a> <span class="url"><a href="{{ result.url }}" class="small_font">original context</a></span> </p> </div> diff --git a/searx/templates/default/result_templates/map.html b/searx/templates/default/result_templates/map.html index 78221aa01..dccec7a53 100644 --- a/searx/templates/default/result_templates/map.html +++ b/searx/templates/default/result_templates/map.html @@ -1,7 +1,7 @@ <div class="result {{ result.class }}"> {% if result['favicon'] %} - <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" /> + <img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" /> {% endif %} <div> diff --git a/searx/templates/default/result_templates/torrent.html b/searx/templates/default/result_templates/torrent.html index 6c62793a5..5925f1313 100644 --- a/searx/templates/default/result_templates/torrent.html +++ b/searx/templates/default/result_templates/torrent.html @@ -1,7 +1,6 @@ <div class="result torrent_result"> <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> - {% if result.content %}<p class="content">{{ result.content|safe }}</p>{% endif %} - <p class="stats">Seed: {{ result.seed }}, Leech: {{ result.leech }}</p> - <p><a href="{{ result.magnetlink }}" class="magnetlink">magnet link</a></p> <p class="url">{{ result.pretty_url }}</p> + {% if result.content %}<p class="content">{{ result.content|safe }}</p>{% endif %} + <p><a href="{{ result.magnetlink }}" class="magnetlink">magnet link</a> - <span class="stats">Seed: {{ result.seed }}, Leech: {{ result.leech }}</span></p> </div> diff --git a/searx/templates/default/result_templates/videos.html b/searx/templates/default/result_templates/videos.html index 233a6c021..0a89a67aa 100644 --- a/searx/templates/default/result_templates/videos.html +++ b/searx/templates/default/result_templates/videos.html @@ -1,8 +1,6 @@ <div class="result"> - <p> - <h3 class="result_title"> {% if result['favicon'] %}<img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> + <h3 class="result_title"> {% if result['favicon'] %}<img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> {% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %} - <a href="{{ result.url }}"><img class="thumbnail" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a> + <a href="{{ result.url }}"><img class="thumbnail" src="{{ result.thumbnail }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> <p class="url">{{ result.url }}</p> - </p> </div> diff --git a/searx/templates/default/results.html b/searx/templates/default/results.html index 541983532..199eb1d96 100644 --- a/searx/templates/default/results.html +++ b/searx/templates/default/results.html @@ -10,7 +10,7 @@ <div id="search_url"> {{ _('Search URL') }}: - <input type="text" value="{{ base_url }}?q={{ q|urlencode }}&pageno={{ pageno }}{% if selected_categories %}&category_{{ selected_categories|join("&category_") }}{% endif %}" readonly="" /> + <input type="text" value="{{ base_url }}?q={{ q|urlencode }}&pageno={{ pageno }}{% if selected_categories %}&category_{{ selected_categories|join("&category_")|replace(' ','+') }}{% endif %}" readonly /> </div> <div id="apis"> {{ _('Download results') }} diff --git a/searx/tests/test_webapp.py b/searx/tests/test_webapp.py index 7771567f0..b922a3675 100644 --- a/searx/tests/test_webapp.py +++ b/searx/tests/test_webapp.py @@ -49,7 +49,7 @@ class ViewsTestCase(SearxTestCase): ) result = self.app.post('/', data={'q': 'test'}) self.assertIn( - '<h3 class="result_title"> <img width="14" height="14" class="favicon" src="static/default/img/icon_youtube.ico" /><a href="http://first.test.xyz">First <span class="highlight">Test</span></a></h3>', # noqa + '<h3 class="result_title"> <img width="14" height="14" class="favicon" src="static/default/img/icon_youtube.ico" alt="youtube" /><a href="http://first.test.xyz">First <span class="highlight">Test</span></a></h3>', # noqa result.data ) self.assertIn( |