summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.pylintrc2
-rw-r--r--docs/admin/installation-searx.rst2
-rw-r--r--docs/dev/makefile.rst2
-rw-r--r--searx/engines/elasticsearch.py142
-rw-r--r--searx/engines/qwant.py2
-rw-r--r--searx/engines/xpath.py1
-rw-r--r--searx/search.py27
-rw-r--r--searx/settings.yml20
-rwxr-xr-xsearx/webapp.py33
-rw-r--r--tests/unit/test_search.py14
-rw-r--r--tests/unit/test_standalone_searx.py3
-rw-r--r--tests/unit/test_webapp.py25
12 files changed, 250 insertions, 23 deletions
diff --git a/.pylintrc b/.pylintrc
index 3b4adb2ca..eb6d500b3 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -12,7 +12,7 @@
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code
-extension-pkg-whitelist=
+extension-pkg-whitelist=lxml.etree
# Add files or directories to the blacklist. They should be base names, not
# paths.
diff --git a/docs/admin/installation-searx.rst b/docs/admin/installation-searx.rst
index f1d486021..a368bfe8c 100644
--- a/docs/admin/installation-searx.rst
+++ b/docs/admin/installation-searx.rst
@@ -52,7 +52,7 @@ In the same shell create *virtualenv*:
:end-before: END create virtualenv
To install searx's dependencies, exit the searx *bash* session you opened above
-and restart a new. Before install, first check if your *virualenv* was sourced
+and restart a new. Before install, first check if your *virtualenv* was sourced
from the login (*~/.profile*):
.. kernel-include:: $DOCS_BUILD/includes/searx.rst
diff --git a/docs/dev/makefile.rst b/docs/dev/makefile.rst
index 62cd0a984..699729a28 100644
--- a/docs/dev/makefile.rst
+++ b/docs/dev/makefile.rst
@@ -68,7 +68,7 @@ Python environment
``source ./local/py3/bin/activate``
-With Makefile we do no longer need to build up the virualenv manually (as
+With Makefile we do no longer need to build up the virtualenv manually (as
described in the :ref:`devquickstart` guide). Jump into your git working tree
and release a ``make pyenv``:
diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py
new file mode 100644
index 000000000..bad65fb27
--- /dev/null
+++ b/searx/engines/elasticsearch.py
@@ -0,0 +1,142 @@
+from json import loads, dumps
+from lxml import html
+from urllib.parse import quote, urljoin
+from requests.auth import HTTPBasicAuth
+from searx.utils import extract_text, get_torrent_size
+
+
+base_url = 'http://localhost:9200'
+username = ''
+password = ''
+index = ''
+search_url = base_url + '/' + index + '/_search'
+query_type = 'match'
+custom_query_json = {}
+show_metadata = False
+categories = ['general']
+
+
+def init(engine_settings):
+ if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
+ raise ValueError('unsupported query type', engine_settings['query_type'])
+
+ if index == '':
+ raise ValueError('index cannot be empty')
+
+
+def request(query, params):
+ if query_type not in _available_query_types:
+ return params
+
+ if username and password:
+ params['auth'] = HTTPBasicAuth(username, password)
+
+ params['url'] = search_url
+ params['method'] = 'GET'
+ params['data'] = dumps(_available_query_types[query_type](query))
+ params['headers']['Content-Type'] = 'application/json'
+
+ return params
+
+
+def _match_query(query):
+ """
+ The standard for full text queries.
+ searx format: "key:value" e.g. city:berlin
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
+ """
+
+ try:
+ key, value = query.split(':')
+ except:
+ raise ValueError('query format must be "key:value"')
+
+ return {"query": {"match": {key: {'query': value}}}}
+
+
+def _simple_query_string_query(query):
+ """
+ Accepts query strings, but it is less strict than query_string
+ The field used can be specified in index.query.default_field in Elasticsearch.
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
+ """
+
+ return {'query': {'simple_query_string': {'query': query}}}
+
+
+def _term_query(query):
+ """
+ Accepts one term and the name of the field.
+ searx format: "key:value" e.g. city:berlin
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
+ """
+
+ try:
+ key, value = query.split(':')
+ except:
+ raise ValueError('query format must be key:value')
+
+ return {'query': {'term': {key: value}}}
+
+
+def _terms_query(query):
+ """
+ Accepts multiple terms and the name of the field.
+ searx format: "key:value1,value2" e.g. city:berlin,paris
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
+ """
+
+ try:
+ key, values = query.split(':')
+ except:
+ raise ValueError('query format must be key:value1,value2')
+
+ return {'query': {'terms': {key: values.split(',')}}}
+
+
+def _custom_query(query):
+ key, value = query.split(':')
+ custom_query = custom_query_json
+ for query_key, query_value in custom_query.items():
+ if query_key == '{{KEY}}':
+ custom_query[key] = custom_query.pop(query_key)
+ if query_value == '{{VALUE}}':
+ custom_query[query_key] = value
+ return custom_query
+
+
+def response(resp):
+ results = []
+
+ resp_json = loads(resp.text)
+ if 'error' in resp_json:
+ raise Exception(resp_json['error'])
+
+ for result in resp_json['hits']['hits']:
+ r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
+ r['template'] = 'key-value.html'
+
+ if show_metadata:
+ r['metadata'] = {'index': result['_index'],
+ 'id': result['_id'],
+ 'score': result['_score']}
+
+ results.append(r)
+
+ return results
+
+
+_available_query_types = {
+ # Full text queries
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
+ 'match': _match_query,
+ 'simple_query_string': _simple_query_string_query,
+
+ # Term-level queries
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
+ 'term': _term_query,
+ 'terms': _terms_query,
+
+ # Query JSON defined by the instance administrator.
+ 'custom': _custom_query,
+}
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 98460604c..c909ce11b 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -17,7 +17,7 @@ from searx.utils import html_to_text, match_language
# engine dependent config
-categories = None
+categories = []
paging = True
language_support = True
supported_languages_url = 'https://qwant.com/region'
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 81c2747fb..a569d9160 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -7,6 +7,7 @@ url_xpath = None
content_xpath = None
title_xpath = None
thumbnail_xpath = False
+categories = []
paging = False
suggestion_xpath = ''
results_xpath = ''
diff --git a/searx/search.py b/searx/search.py
index cd195825a..1cb2a603b 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -57,8 +57,11 @@ class EngineRef:
self.category = category
self.from_bang = from_bang
- def __str__(self):
- return "(" + self.name + "," + self.category + "," + str(self.from_bang) + ")"
+ def __repr__(self):
+ return "EngineRef({!r}, {!r}, {!r})".format(self.name, self.category, self.from_bang)
+
+ def __eq__(self, other):
+ return self.name == other.name and self.category == other.category and self.from_bang == other.from_bang
class SearchQuery:
@@ -87,8 +90,21 @@ class SearchQuery:
self.timeout_limit = timeout_limit
self.external_bang = external_bang
- def __str__(self):
- return self.query + ";" + str(self.engineref_list)
+ def __repr__(self):
+ return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
+ format(self.query, self.engineref_list, self.categories, self.lang, self.safesearch,
+ self.pageno, self.time_range, self.timeout_limit, self.external_bang)
+
+ def __eq__(self, other):
+ return self.query == other.query\
+ and self.engineref_list == other.engineref_list\
+ and self.categories == self.categories\
+ and self.lang == other.lang\
+ and self.safesearch == other.safesearch\
+ and self.pageno == other.pageno\
+ and self.time_range == other.time_range\
+ and self.timeout_limit == other.timeout_limit\
+ and self.external_bang == other.external_bang
def send_http_request(engine, request_params):
@@ -110,7 +126,8 @@ def send_http_request(engine, request_params):
req = requests_lib.get
else:
req = requests_lib.post
- request_args['data'] = request_params['data']
+
+ request_args['data'] = request_params['data']
# send the request
return req(request_params['url'], **request_args)
diff --git a/searx/settings.yml b/searx/settings.yml
index 54352bbfc..78ae26b97 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -17,6 +17,12 @@ server:
image_proxy : False # Proxying image results through searx
http_protocol_version : "1.0" # 1.0 and 1.1 are supported
method: "POST" # POST queries are more secure as they don't show up in history but may cause problems when using Firefox containers
+ default_http_headers:
+ X-Content-Type-Options : nosniff
+ X-XSS-Protection : 1; mode=block
+ X-Download-Options : noopen
+ X-Robots-Tag : noindex, nofollow
+ Referrer-Policy : no-referrer
ui:
static_path : "" # Custom static path - leave it blank if you didn't change
@@ -225,6 +231,20 @@ engines:
shortcut : ew
disabled : True
+# - name : elasticsearch
+# shortcut : es
+# engine : elasticsearch
+# base_url : http://localhost:9200
+# username : elastic
+# password : changeme
+# index : my-index
+# # available options: match, simple_query_string, term, terms, custom
+# query_type : match
+# # if query_type is set to custom, provide your query here
+# #custom_query_json: {"query":{"match_all": {}}}
+# #show_metadata: False
+# disabled : True
+
- name : wikidata
engine : wikidata
shortcut : wd
diff --git a/searx/webapp.py b/searx/webapp.py
index 46d547d52..326200cec 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -44,7 +44,7 @@ from urllib.parse import urlencode, urlparse, urljoin, urlsplit
from pygments import highlight
from pygments.lexers import get_lexer_by_name
-from pygments.formatters import HtmlFormatter
+from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module
from werkzeug.middleware.proxy_fix import ProxyFix
from flask import (
@@ -111,7 +111,7 @@ app = Flask(
app.jinja_env.trim_blocks = True
app.jinja_env.lstrip_blocks = True
-app.jinja_env.add_extension('jinja2.ext.loopcontrols')
+app.jinja_env.add_extension('jinja2.ext.loopcontrols') # pylint: disable=no-member
app.secret_key = settings['server']['secret_key']
# see https://flask.palletsprojects.com/en/1.1.x/cli/
@@ -488,6 +488,16 @@ def pre_request():
@app.after_request
+def add_default_headers(response):
+ # set default http headers
+ for header, value in settings['server'].get('default_http_headers', {}).items():
+ if header in response.headers:
+ continue
+ response.headers[header] = value
+ return response
+
+
+@app.after_request
def post_request(response):
total_time = time() - request.start_time
timings_all = ['total;dur=' + str(round(total_time * 1000, 3))]
@@ -537,10 +547,12 @@ def index():
# redirect to search if there's a query in the request
if request.form.get('q'):
- return redirect(url_for('search'), 308)
+ query = ('?' + request.query_string.decode()) if request.query_string else ''
+ return redirect(url_for('search') + query, 308)
return render(
'index.html',
+ selected_categories=get_selected_categories(request.preferences, request.form),
)
@@ -556,8 +568,8 @@ def search():
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
- # check if there is query
- if request.form.get('q') is None:
+ # check if there is query (not None and not an empty string)
+ if not request.form.get('q'):
if output_format == 'html':
return render(
'index.html',
@@ -577,15 +589,12 @@ def search():
result_container = search.search()
+ except SearxParameterException as e:
+ logger.exception('search error: SearxParameterException')
+ return index_error(output_format, e.message), 400
except Exception as e:
- # log exception
logger.exception('search error')
-
- # is it an invalid input parameter or something else ?
- if (issubclass(e.__class__, SearxParameterException)):
- return index_error(output_format, e.message), 400
- else:
- return index_error(output_format, gettext('search error')), 500
+ return index_error(output_format, gettext('search error')), 500
# results
results = result_container.get_ordered_results()
diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py
index 36135913c..464a9b37d 100644
--- a/tests/unit/test_search.py
+++ b/tests/unit/test_search.py
@@ -21,6 +21,20 @@ TEST_ENGINES = [
]
+class SearchQueryTestCase(SearxTestCase):
+
+ def test_repr(self):
+ s = SearchQuery('test', [EngineRef('bing', 'general', False)], ['general'], 'all', 0, 1, '1', 5.0, 'g')
+ self.assertEqual(repr(s),
+ "SearchQuery('test', [EngineRef('bing', 'general', False)], ['general'], 'all', 0, 1, '1', 5.0, 'g')") # noqa
+
+ def test_eq(self):
+ s = SearchQuery('test', [EngineRef('bing', 'general', False)], ['general'], 'all', 0, 1, None, None, None)
+ t = SearchQuery('test', [EngineRef('google', 'general', False)], ['general'], 'all', 0, 1, None, None, None)
+ self.assertEqual(s, s)
+ self.assertNotEqual(s, t)
+
+
class SearchTestCase(SearxTestCase):
@classmethod
diff --git a/tests/unit/test_standalone_searx.py b/tests/unit/test_standalone_searx.py
index 6b8bdac2d..ddf140799 100644
--- a/tests/unit/test_standalone_searx.py
+++ b/tests/unit/test_standalone_searx.py
@@ -8,6 +8,7 @@ import sys
from mock import Mock, patch
from nose2.tools import params
+from searx.search import SearchQuery
from searx.testing import SearxTestCase
@@ -94,7 +95,7 @@ class StandaloneSearx(SearxTestCase):
args = sas.parse_argument(['rain', ])
search_q = sas.get_search_query(args)
self.assertTrue(search_q)
- self.assertEqual(str(search_q), 'rain;[]')
+ self.assertEqual(search_q, SearchQuery('rain', [], ['general'], 'all', 0, 1, None, None, None))
def test_no_parsed_url(self):
"""test no_parsed_url func"""
diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py
index 08a266931..75a968ad8 100644
--- a/tests/unit/test_webapp.py
+++ b/tests/unit/test_webapp.py
@@ -75,9 +75,32 @@ class ViewsTestCase(SearxTestCase):
self.assertEqual(result.status_code, 200)
self.assertIn(b'<div class="title"><h1>searx</h1></div>', result.data)
- def test_index_html(self):
+ def test_index_html_post(self):
result = self.app.post('/', data={'q': 'test'})
self.assertEqual(result.status_code, 308)
+ self.assertEqual(result.location, 'http://localhost/search')
+
+ def test_index_html_get(self):
+ result = self.app.post('/?q=test')
+ self.assertEqual(result.status_code, 308)
+ self.assertEqual(result.location, 'http://localhost/search?q=test')
+
+ def test_search_empty_html(self):
+ result = self.app.post('/search', data={'q': ''})
+ self.assertEqual(result.status_code, 200)
+ self.assertIn(b'<div class="title"><h1>searx</h1></div>', result.data)
+
+ def test_search_empty_json(self):
+ result = self.app.post('/search', data={'q': '', 'format': 'json'})
+ self.assertEqual(result.status_code, 400)
+
+ def test_search_empty_csv(self):
+ result = self.app.post('/search', data={'q': '', 'format': 'csv'})
+ self.assertEqual(result.status_code, 400)
+
+ def test_search_empty_rss(self):
+ result = self.app.post('/search', data={'q': '', 'format': 'rss'})
+ self.assertEqual(result.status_code, 400)
def test_search_html(self):
result = self.app.post('/search', data={'q': 'test'})