diff options
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/engines/ahmia.py | 4 | ||||
| -rw-r--r-- | searx/engines/mongodb.py | 65 | ||||
| -rw-r--r-- | searx/engines/not_evil.py | 67 | ||||
| -rw-r--r-- | searx/settings.yml | 20 |
4 files changed, 80 insertions, 76 deletions
diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py index 6c502bb40..b9a0086bd 100644 --- a/searx/engines/ahmia.py +++ b/searx/engines/ahmia.py @@ -9,7 +9,7 @@ from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval # about about = { - "website": 'http://msydqstlz2kzerdg.onion', + "website": 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion', "wikidata_id": 'Q18693938', "official_api_documentation": None, "use_official_api": False, @@ -23,7 +23,7 @@ paging = True page_size = 10 # search url -search_url = 'http://msydqstlz2kzerdg.onion/search/?{query}' +search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}' time_range_support = True time_range_dict = {'day': 1, 'week': 7, diff --git a/searx/engines/mongodb.py b/searx/engines/mongodb.py new file mode 100644 index 000000000..1f24c5acf --- /dev/null +++ b/searx/engines/mongodb.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-function-docstring +"""MongoDB engine (Offline) + +""" + +import re +from pymongo import MongoClient # pylint: disable=import-error + +engine_type = 'offline' + +# mongodb connection variables +host = '127.0.0.1' +port = 27017 +username = '' +password = '' +database = None +collection = None +key = None + +# engine specific variables +paging = True +results_per_page = 20 +exact_match_only = False +result_template = 'key-value.html' + +_client = None + +def init(_): + connect() + +def connect(): + global _client # pylint: disable=global-statement + kwargs = { 'port': port } + if username: + kwargs['username'] = username + if password: + kwargs['password'] = password + _client = MongoClient(host, **kwargs)[database][collection] + +def search(query, params): + results = [] + if exact_match_only: + q = { '$eq': query } + else: + _re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M ) + q = { '$regex': _re } + + query = _client.find( + {key: q} + ).skip( + ( params['pageno'] -1 ) * results_per_page + ).limit( + results_per_page + ) + + results.append({ 'number_of_results': query.count() }) + for r in query: + del r['_id'] + r = { str(k):str(v) for k,v in r.items() } + r['template'] = result_template + results.append(r) + + return results diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py deleted file mode 100644 index df41c0941..000000000 --- a/searx/engines/not_evil.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - not Evil (Onions) -""" - -from urllib.parse import urlencode -from lxml import html -from searx.engines.xpath import extract_text - -# about -about = { - "website": 'http://hss3uro2hsxfogfq.onion', - "wikidata_id": None, - "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm', - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['onions'] -paging = True -page_size = 20 - -# search-url -base_url = 'http://hss3uro2hsxfogfq.onion/' -search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}' - -# specific xpath variables -results_xpath = '//*[@id="content"]/div/p' -url_xpath = './span[1]' -title_xpath = './a[1]' -content_xpath = './text()' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * page_size - - params['url'] = base_url + search_url.format(pageno=offset, - query=urlencode({'q': query}), - page_size=page_size) - - return params - - -# get response from search-request -def response(resp): - results = [] - - # needed because otherwise requests guesses wrong encoding - resp.encoding = 'utf8' - dom = html.fromstring(resp.text) - - # parse results - for result in dom.xpath(results_xpath): - url = extract_text(result.xpath(url_xpath)[0]) - title = extract_text(result.xpath(title_xpath)[0]) - content = extract_text(result.xpath(content_xpath)) - - # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'is_onion': True}) - - return results diff --git a/searx/settings.yml b/searx/settings.yml index e12a39c1a..faadb36d1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -811,6 +811,19 @@ engines: engine: mixcloud shortcut: mc + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + - name: npm engine: json_engine paging: true @@ -832,13 +845,6 @@ engines: require_api_key: false results: JSON - # Requires Tor - - name: not evil - engine: not_evil - categories: onions - enable_http: true - shortcut: ne - - name: nyaa engine: nyaa shortcut: nt |