diff options
| author | a01200356 <a01200356@itesm.mx> | 2016-05-19 00:38:43 -0500 |
|---|---|---|
| committer | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2020-10-25 17:59:05 -0700 |
| commit | c3daa08537668c24224fffecbed4347fee936fcf (patch) | |
| tree | 23cfde77015e13e8687bf08bee9d5a4271b7af7f /searx/engines/not_evil.py | |
| parent | 0a44fa8bb7eca0d81f0ebdab37b9845b260473ad (diff) | |
[enh] Add onions category with Ahmia, Not Evil and Torch
Xpath engine and results template changed to account for the fact that
archive.org doesn't cache .onions, though some onion engines migth have
their own cache.
Disabled by default. Can be enabled by setting the SOCKS proxies to
wherever Tor is listening and setting using_tor_proxy as True.
Requires Tor and updating packages.
To avoid manually adding the timeout on each engine, you can set
extra_proxy_timeout to account for Tor's (or whatever proxy used) extra
time.
Diffstat (limited to 'searx/engines/not_evil.py')
| -rw-r--r-- | searx/engines/not_evil.py | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py new file mode 100644 index 000000000..e84f153bd --- /dev/null +++ b/searx/engines/not_evil.py @@ -0,0 +1,64 @@ +""" + not Evil (Onions) + + @website http://hss3uro2hsxfogfq.onion + @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm) + + @using-api no + @results HTML + @stable no + @parse url, title, content +""" + +from urllib.parse import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['onions'] +paging = True +page_size = 20 + +# search-url +base_url = 'http://hss3uro2hsxfogfq.onion/' +search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}' + +# specific xpath variables +results_xpath = '//*[@id="content"]/div/p' +url_xpath = './span[1]' +title_xpath = './a[1]' +content_xpath = './text()' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * page_size + + params['url'] = base_url + search_url.format(pageno=offset, + query=urlencode({'q': query}), + page_size=page_size) + + return params + + +# get response from search-request +def response(resp): + results = [] + + # needed because otherwise requests guesses wrong encoding + resp.encoding = 'utf8' + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath(results_xpath): + url = extract_text(result.xpath(url_xpath)[0]) + title = extract_text(result.xpath(title_xpath)[0]) + content = extract_text(result.xpath(content_xpath)) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'is_onion': True}) + + return results |