11 files changed, 399 insertions, 14 deletions
diff --git a/.gitignore b/.gitignore
index e56a575ab..b1286ea66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ setup.cfg
 *.pyc
 */*.pyc
 *~
+*.swp
 
 /node_modules
 
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 9cdca47b7..00be89412 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -142,6 +142,17 @@ def load_engine(engine_data):
         engine.stats['page_load_time'] = 0
         engine.stats['page_load_count'] = 0
 
+    # tor related settings
+    if settings['outgoing'].get('using_tor_proxy'):
+        # use onion url if using tor.
+        if hasattr(engine, 'onion_url'):
+            engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
+    elif 'onions' in engine.categories:
+        # exclude onion engines if not using tor.
+        return None
+
+    engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
+
     for category_name in engine.categories:
         categories.setdefault(category_name, []).append(engine)
 
@@ -252,8 +263,9 @@ def get_engines_stats(preferences):
 
 
 def load_engines(engine_list):
-    global engines
+    global engines, engine_shortcuts
     engines.clear()
+    engine_shortcuts.clear()
     for engine_data in engine_list:
         engine = load_engine(engine_data)
         if engine is not None:
diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py
new file mode 100644
index 000000000..d9fcc6ca7
--- /dev/null
+++ b/searx/engines/ahmia.py
@@ -0,0 +1,82 @@
+"""
+ Ahmia (Onions)
+
+ @website      http://msydqstlz2kzerdg.onion
+ @provides-api no
+
+ @using-api    no
+ @results      HTML
+ @stable       no
+ @parse        url, title, content
+"""
+
+from urllib.parse import urlencode, urlparse, parse_qs
+from lxml.html import fromstring
+from searx.engines.xpath import extract_url, extract_text
+
+# engine config
+categories = ['onions']
+paging = True
+page_size = 10
+
+# search url
+search_url = 'http://msydqstlz2kzerdg.onion/search/?{query}'
+time_range_support = True
+time_range_dict = {'day': 1,
+                   'week': 7,
+                   'month': 30}
+
+# xpaths
+results_xpath = '//li[@class="result"]'
+url_xpath = './h4/a/@href'
+title_xpath = './h4/a[1]'
+content_xpath = './/p[1]'
+correction_xpath = '//*[@id="didYouMean"]//a'
+number_of_results_xpath = '//*[@id="totalResults"]'
+
+
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'q': query}))
+
+    if params['time_range'] in time_range_dict:
+        params['url'] += '&' + urlencode({'d': time_range_dict[params['time_range']]})
+
+    return params
+
+
+def response(resp):
+    results = []
+    dom = fromstring(resp.text)
+
+    # trim results so there's not way too many at once
+    first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
+    all_results = dom.xpath(results_xpath)
+    trimmed_results = all_results[first_result_index:first_result_index + page_size]
+
+    # get results
+    for result in trimmed_results:
+        # remove ahmia url and extract the actual url for the result
+        raw_url = extract_url(result.xpath(url_xpath), search_url)
+        cleaned_url = parse_qs(urlparse(raw_url).query).get('redirect_url', [''])[0]
+
+        title = extract_text(result.xpath(title_xpath))
+        content = extract_text(result.xpath(content_xpath))
+
+        results.append({'url': cleaned_url,
+                        'title': title,
+                        'content': content,
+                        'is_onion': True})
+
+    # get spelling corrections
+    for correction in dom.xpath(correction_xpath):
+        results.append({'correction': extract_text(correction)})
+
+    # get number of results
+    number_of_results = dom.xpath(number_of_results_xpath)
+    if number_of_results:
+        try:
+            results.append({'number_of_results': int(extract_text(number_of_results))})
+        except:
+            pass
+
+    return results
diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py
new file mode 100644
index 000000000..e84f153bd
--- /dev/null
+++ b/searx/engines/not_evil.py
@@ -0,0 +1,64 @@
+"""
+ not Evil (Onions)
+
+ @website     http://hss3uro2hsxfogfq.onion
+ @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
+
+ @using-api   no
+ @results     HTML
+ @stable      no
+ @parse       url, title, content
+"""
+
+from urllib.parse import urlencode
+from lxml import html
+from searx.engines.xpath import extract_text
+
+# engine dependent config
+categories = ['onions']
+paging = True
+page_size = 20
+
+# search-url
+base_url = 'http://hss3uro2hsxfogfq.onion/'
+search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}'
+
+# specific xpath variables
+results_xpath = '//*[@id="content"]/div/p'
+url_xpath = './span[1]'
+title_xpath = './a[1]'
+content_xpath = './text()'
+
+
+# do search-request
+def request(query, params):
+    offset = (params['pageno'] - 1) * page_size
+
+    params['url'] = base_url + search_url.format(pageno=offset,
+                                                 query=urlencode({'q': query}),
+                                                 page_size=page_size)
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    # needed because otherwise requests guesses wrong encoding
+    resp.encoding = 'utf8'
+    dom = html.fromstring(resp.text)
+
+    # parse results
+    for result in dom.xpath(results_xpath):
+        url = extract_text(result.xpath(url_xpath)[0])
+        title = extract_text(result.xpath(title_xpath)[0])
+        content = extract_text(result.xpath(content_xpath))
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'content': content,
+                        'is_onion': True})
+
+    return results
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index a269253d7..81c2747fb 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -10,6 +10,8 @@ thumbnail_xpath = False
 paging = False
 suggestion_xpath = ''
 results_xpath = ''
+cached_xpath = ''
+cached_url = ''
 
 # parameters for engines with paging support
 #
@@ -36,6 +38,8 @@ def request(query, params):
 def response(resp):
     results = []
     dom = html.fromstring(resp.text)
+    is_onion = True if 'onions' in categories else False
+
     if results_xpath:
         for result in eval_xpath(dom, results_xpath):
             url = extract_url(eval_xpath(result, url_xpath), search_url)
@@ -49,15 +53,33 @@ def response(resp):
                 if len(thumbnail_xpath_result) > 0:
                     tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url)
 
+            # add alternative cached url if available
+            if cached_xpath:
+                tmp_result['cached_url'] = cached_url + extract_text(result.xpath(cached_xpath))
+
+            if is_onion:
+                tmp_result['is_onion'] = True
+
             results.append(tmp_result)
     else:
-        for url, title, content in zip(
-            (extract_url(x, search_url) for
-             x in eval_xpath(dom, url_xpath)),
-            map(extract_text, eval_xpath(dom, title_xpath)),
-            map(extract_text, eval_xpath(dom, content_xpath))
-        ):
-            results.append({'url': url, 'title': title, 'content': content})
+        if cached_xpath:
+            for url, title, content, cached in zip(
+                (extract_url(x, search_url) for
+                 x in dom.xpath(url_xpath)),
+                map(extract_text, dom.xpath(title_xpath)),
+                map(extract_text, dom.xpath(content_xpath)),
+                map(extract_text, dom.xpath(cached_xpath))
+            ):
+                results.append({'url': url, 'title': title, 'content': content,
+                                'cached_url': cached_url + cached, 'is_onion': is_onion})
+        else:
+            for url, title, content in zip(
+                (extract_url(x, search_url) for
+                 x in dom.xpath(url_xpath)),
+                map(extract_text, dom.xpath(title_xpath)),
+                map(extract_text, dom.xpath(content_xpath))
+            ):
+                results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion})
 
     if not suggestion_xpath:
         return results
diff --git a/searx/settings.yml b/searx/settings.yml
index b23f48b45..54352bbfc 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -60,8 +60,10 @@ outgoing: # communication with search engines
 # see http://docs.python-requests.org/en/latest/user/advanced/#proxies
 # SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks
 #    proxies :
-#        http : http://127.0.0.1:8080
-#        https: http://127.0.0.1:8080
+#        http : socks5h://127.0.0.1:9050
+#        https: socks5h://127.0.0.1:9050
+#    using_tor_proxy : True
+#    extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy
 # uncomment below section only if you have more than one network interface
 # which can be the source of outgoing search requests
 #    source_ips:
@@ -89,6 +91,12 @@ engines:
     shortcut: apkm
     disabled: True
 
+# Requires Tor
+  - name : ahmia
+    engine : ahmia
+    categories : onions
+    shortcut : ah
+
   - name : arch linux wiki
     engine : archlinux
     shortcut : al
@@ -185,7 +193,7 @@ engines:
   - name : deviantart
     engine : deviantart
     shortcut : da
-    timeout: 3.0
+    timeout : 3.0
 
   - name : ddg definitions
     engine : duckduckgo_definitions
@@ -514,6 +522,11 @@ engines:
     timeout: 5.0
     shortcut : npm
 
+# Requires Tor
+  - name : not evil
+    engine : not_evil
+    shortcut : ne
+
   - name : nyaa
     engine : nyaa
     shortcut : nt
@@ -698,6 +711,18 @@ engines:
     url: https://torrentz2.eu/
     timeout : 3.0
 
+# Requires Tor
+  - name : torch
+    engine : xpath
+    paging : True
+    search_url : http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and
+    results_xpath : //table//tr
+    url_xpath : ./td[2]/a
+    title_xpath : ./td[2]/b
+    content_xpath : ./td[2]/small
+    categories : onions
+    shortcut : tch
+
   - name : twitter
     engine : twitter
     shortcut : tw
diff --git a/searx/templates/legacy/result_templates/default.html b/searx/templates/legacy/result_templates/default.html
index 13e2d2913..78bf031df 100644
--- a/searx/templates/legacy/result_templates/default.html
+++ b/searx/templates/legacy/result_templates/default.html
@@ -1,6 +1,11 @@
 <div class="result {{ result.class }}{% for e in result.engines %} {{ e }}{% endfor %}">
     <h3 class="result_title">{% if "icon_"~result.engine~".ico" in favicons %}<img width="14" height="14" class="favicon" src="{{ url_for('static', filename='img/icons/icon_'+result.engine+'.ico') }}" alt="{{result.engine}}" />{% endif %}<a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3>
-    <p class="url">{{ result.pretty_url }}&lrm; <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('cached') }}</a>
+    <p class="url">{{ result.pretty_url }}&lrm;
+    {% if result.cached_url %}
+        <a class="cache_link" href="{{ result.cached_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('cached') }}</a>
+    {% elif not result.is_onion %}
+        <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('cached') }}</a>
+    {% endif %}
     {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span>{% endif %}</p>
     <p class="content">{% if result.img_src %}<img src="{{ image_proxify(result.img_src) }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p>
 </div>
diff --git a/searx/templates/oscar/macros.html b/searx/templates/oscar/macros.html
index f52d9713c..57a90aaa2 100644
--- a/searx/templates/oscar/macros.html
+++ b/searx/templates/oscar/macros.html
@@ -32,7 +32,11 @@
             <span class="label label-default">{{ engine }}</span>
         {%- endfor -%}
         {%- if result.url -%}
-        <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
+            {% if result.cached_url %}
+            <small>{{ result_link(result.cached_url, icon('link') + _('cached'), "text-info", id) }}</small>
+            {% elif not result.is_onion %}
+            <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
+            {% endif %}
         {%- endif -%}
         {%- if proxify -%}
         <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info", id) }}</small>
@@ -50,7 +54,11 @@
         <span class="label label-default">{{ engine }}</span>
     {%- endfor %}
     {%- if result.url -%}
-    <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
+        {% if result.cached_url %}
+        <small>{{ result_link(result.cached_url, icon('link') + _('cached'), "text-info", id) }}</small>
+        {% elif not result.is_onion %}
+        <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
+        {% endif %}
     {%- endif -%}
     {% if proxify -%}
     <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info", id) }}</small>
diff --git a/searx/webapp.py b/searx/webapp.py
index cf9a09778..609669b85 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -146,6 +146,7 @@ _category_names = (gettext('files'),
                    gettext('it'),
                    gettext('news'),
                    gettext('map'),
+                   gettext('onions'),
                    gettext('science'))
 
 outgoing_proxies = settings['outgoing'].get('proxies') or None
diff --git a/tests/unit/engines/test_xpath.py b/tests/unit/engines/test_xpath.py
new file mode 100644
index 000000000..963a44a25
--- /dev/null
+++ b/tests/unit/engines/test_xpath.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import xpath
+from searx.testing import SearxTestCase
+
+
+class TestXpathEngine(SearxTestCase):
+
+    def test_request(self):
+        xpath.search_url = 'https://url.com/{query}'
+        xpath.categories = []
+        xpath.paging = False
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        params = xpath.request(query, dicto)
+        self.assertIn('url', params)
+        self.assertEquals('https://url.com/test_query', params['url'])
+
+        xpath.search_url = 'https://url.com/q={query}&p={pageno}'
+        xpath.paging = True
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        dicto['pageno'] = 1
+        params = xpath.request(query, dicto)
+        self.assertIn('url', params)
+        self.assertEquals('https://url.com/q=test_query&p=1', params['url'])
+
+    def test_response(self):
+        # without results_xpath
+        xpath.url_xpath = '//div[@class="search_result"]//a[@class="result"]/@href'
+        xpath.title_xpath = '//div[@class="search_result"]//a[@class="result"]'
+        xpath.content_xpath = '//div[@class="search_result"]//p[@class="content"]'
+
+        self.assertRaises(AttributeError, xpath.response, None)
+        self.assertRaises(AttributeError, xpath.response, [])
+        self.assertRaises(AttributeError, xpath.response, '')
+        self.assertRaises(AttributeError, xpath.response, '[]')
+
+        response = mock.Mock(text='<html></html>')
+        self.assertEqual(xpath.response(response), [])
+
+        html = u"""
+        <div>
+            <div class="search_result">
+                <a class="result" href="https://result1.com">Result 1</a>
+                <p class="content">Content 1</p>
+                <a class="cached" href="https://cachedresult1.com">Cache</a>
+            </div>
+            <div class="search_result">
+                <a class="result" href="https://result2.com">Result 2</a>
+                <p class="content">Content 2</p>
+                <a class="cached" href="https://cachedresult2.com">Cache</a>
+            </div>
+        </div>
+        """
+        response = mock.Mock(text=html)
+        results = xpath.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['title'], 'Result 1')
+        self.assertEqual(results[0]['url'], 'https://result1.com/')
+        self.assertEqual(results[0]['content'], 'Content 1')
+        self.assertEqual(results[1]['title'], 'Result 2')
+        self.assertEqual(results[1]['url'], 'https://result2.com/')
+        self.assertEqual(results[1]['content'], 'Content 2')
+
+        # with cached urls, without results_xpath
+        xpath.cached_xpath = '//div[@class="search_result"]//a[@class="cached"]/@href'
+        results = xpath.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['cached_url'], 'https://cachedresult1.com')
+        self.assertEqual(results[1]['cached_url'], 'https://cachedresult2.com')
+        self.assertFalse(results[0].get('is_onion', False))
+
+        # results are onion urls (no results_xpath)
+        xpath.categories = ['onions']
+        results = xpath.response(response)
+        self.assertTrue(results[0]['is_onion'])
+
+        # with results_xpath
+        xpath.results_xpath = '//div[@class="search_result"]'
+        xpath.url_xpath = './/a[@class="result"]/@href'
+        xpath.title_xpath = './/a[@class="result"]'
+        xpath.content_xpath = './/p[@class="content"]'
+        xpath.cached_xpath = None
+        xpath.categories = []
+
+        self.assertRaises(AttributeError, xpath.response, None)
+        self.assertRaises(AttributeError, xpath.response, [])
+        self.assertRaises(AttributeError, xpath.response, '')
+        self.assertRaises(AttributeError, xpath.response, '[]')
+
+        response = mock.Mock(text='<html></html>')
+        self.assertEqual(xpath.response(response), [])
+
+        response = mock.Mock(text=html)
+        results = xpath.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['title'], 'Result 1')
+        self.assertEqual(results[0]['url'], 'https://result1.com/')
+        self.assertEqual(results[0]['content'], 'Content 1')
+        self.assertEqual(results[1]['title'], 'Result 2')
+        self.assertEqual(results[1]['url'], 'https://result2.com/')
+        self.assertEqual(results[1]['content'], 'Content 2')
+
+        # with cached urls, with results_xpath
+        xpath.cached_xpath = './/a[@class="cached"]/@href'
+        results = xpath.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['cached_url'], 'https://cachedresult1.com')
+        self.assertEqual(results[1]['cached_url'], 'https://cachedresult2.com')
+        self.assertFalse(results[0].get('is_onion', False))
+
+        # results are onion urls (with results_xpath)
+        xpath.categories = ['onions']
+        results = xpath.response(response)
+        self.assertTrue(results[0]['is_onion'])
diff --git a/tests/unit/test_engines_init.py b/tests/unit/test_engines_init.py
new file mode 100644
index 000000000..cf4d50309
--- /dev/null
+++ b/tests/unit/test_engines_init.py
@@ -0,0 +1,44 @@
+from searx.testing import SearxTestCase
+from searx import settings, engines
+
+
+class TestEnginesInit(SearxTestCase):
+
+    @classmethod
+    def tearDownClass(cls):
+        settings['outgoing']['using_tor_proxy'] = False
+        settings['outgoing']['extra_proxy_timeout'] = 0
+
+    def test_initialize_engines_default(self):
+        engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'},
+                       {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2'}]
+
+        engines.initialize_engines(engine_list)
+        self.assertEqual(len(engines.engines), 2)
+        self.assertIn('engine1', engines.engines)
+        self.assertIn('engine2', engines.engines)
+
+    def test_initialize_engines_exclude_onions(self):
+        settings['outgoing']['using_tor_proxy'] = False
+        engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'},
+                       {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
+
+        engines.initialize_engines(engine_list)
+        self.assertEqual(len(engines.engines), 1)
+        self.assertIn('engine1', engines.engines)
+        self.assertNotIn('onions', engines.categories)
+
+    def test_initialize_engines_include_onions(self):
+        settings['outgoing']['using_tor_proxy'] = True
+        settings['outgoing']['extra_proxy_timeout'] = 100.0
+        engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general',
+                        'timeout': 20.0, 'onion_url': 'http://engine1.onion'},
+                       {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
+
+        engines.initialize_engines(engine_list)
+        self.assertEqual(len(engines.engines), 2)
+        self.assertIn('engine1', engines.engines)
+        self.assertIn('engine2', engines.engines)
+        self.assertIn('onions', engines.categories)
+        self.assertIn('http://engine1.onion', engines.engines['engine1'].search_url)
+        self.assertEqual(engines.engines['engine1'].timeout, 120.0)