diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2020-10-26 14:20:58 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-10-26 14:20:58 +0100 |
| commit | db703a0283ee169381aeea97c678e666ae508348 (patch) | |
| tree | 6b992653be4ab7905f2b7bf27d98d64cb15570fe /searx/engines/not_evil.py | |
| parent | 2aef38c3b9d1fe93e9d665a49b10151d63d92392 (diff) | |
| parent | 32957cdf49c306a5f50ca78bb50c0978ffe5c072 (diff) | |
Merge pull request #565 from MarcAbonce/onions
New category: Onions
Diffstat (limited to 'searx/engines/not_evil.py')
| -rw-r--r-- | searx/engines/not_evil.py | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py new file mode 100644 index 000000000..e84f153bd --- /dev/null +++ b/searx/engines/not_evil.py @@ -0,0 +1,64 @@ +""" + not Evil (Onions) + + @website http://hss3uro2hsxfogfq.onion + @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm) + + @using-api no + @results HTML + @stable no + @parse url, title, content +""" + +from urllib.parse import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['onions'] +paging = True +page_size = 20 + +# search-url +base_url = 'http://hss3uro2hsxfogfq.onion/' +search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}' + +# specific xpath variables +results_xpath = '//*[@id="content"]/div/p' +url_xpath = './span[1]' +title_xpath = './a[1]' +content_xpath = './text()' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * page_size + + params['url'] = base_url + search_url.format(pageno=offset, + query=urlencode({'q': query}), + page_size=page_size) + + return params + + +# get response from search-request +def response(resp): + results = [] + + # needed because otherwise requests guesses wrong encoding + resp.encoding = 'utf8' + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath(results_xpath): + url = extract_text(result.xpath(url_xpath)[0]) + title = extract_text(result.xpath(title_xpath)[0]) + content = extract_text(result.xpath(content_xpath)) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'is_onion': True}) + + return results |