From c3daa08537668c24224fffecbed4347fee936fcf Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Thu, 19 May 2016 00:38:43 -0500
Subject: [enh] Add onions category with Ahmia, Not Evil and Torch

Xpath engine and results template changed to account for the fact that
archive.org doesn't cache .onions, though some onion engines migth have
their own cache.

Disabled by default. Can be enabled by setting the SOCKS proxies to
wherever Tor is listening and setting using_tor_proxy as True.

Requires Tor and updating packages.

To avoid manually adding the timeout on each engine, you can set
extra_proxy_timeout to account for Tor's (or whatever proxy used) extra
time.
---
 searx/engines/xpath.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

(limited to 'searx/engines/xpath.py')

diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index a269253d7..81c2747fb 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -10,6 +10,8 @@ thumbnail_xpath = False
 paging = False
 suggestion_xpath = ''
 results_xpath = ''
+cached_xpath = ''
+cached_url = ''
 
 # parameters for engines with paging support
 #
@@ -36,6 +38,8 @@ def request(query, params):
 def response(resp):
     results = []
     dom = html.fromstring(resp.text)
+    is_onion = True if 'onions' in categories else False
+
     if results_xpath:
         for result in eval_xpath(dom, results_xpath):
             url = extract_url(eval_xpath(result, url_xpath), search_url)
@@ -49,15 +53,33 @@ def response(resp):
                 if len(thumbnail_xpath_result) > 0:
                     tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url)
 
+            # add alternative cached url if available
+            if cached_xpath:
+                tmp_result['cached_url'] = cached_url + extract_text(result.xpath(cached_xpath))
+
+            if is_onion:
+                tmp_result['is_onion'] = True
+
             results.append(tmp_result)
     else:
-        for url, title, content in zip(
-            (extract_url(x, search_url) for
-             x in eval_xpath(dom, url_xpath)),
-            map(extract_text, eval_xpath(dom, title_xpath)),
-            map(extract_text, eval_xpath(dom, content_xpath))
-        ):
-            results.append({'url': url, 'title': title, 'content': content})
+        if cached_xpath:
+            for url, title, content, cached in zip(
+                (extract_url(x, search_url) for
+                 x in dom.xpath(url_xpath)),
+                map(extract_text, dom.xpath(title_xpath)),
+                map(extract_text, dom.xpath(content_xpath)),
+                map(extract_text, dom.xpath(cached_xpath))
+            ):
+                results.append({'url': url, 'title': title, 'content': content,
+                                'cached_url': cached_url + cached, 'is_onion': is_onion})
+        else:
+            for url, title, content in zip(
+                (extract_url(x, search_url) for
+                 x in dom.xpath(url_xpath)),
+                map(extract_text, dom.xpath(title_xpath)),
+                map(extract_text, dom.xpath(content_xpath))
+            ):
+                results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion})
 
     if not suggestion_xpath:
         return results
-- 
cgit v1.2.3