summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/elasticsearch.py142
-rw-r--r--searx/engines/qwant.py2
-rw-r--r--searx/engines/xpath.py1
-rw-r--r--searx/search.py27
-rw-r--r--searx/settings.yml20
-rwxr-xr-xsearx/webapp.py33
6 files changed, 207 insertions, 18 deletions
diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py
new file mode 100644
index 000000000..bad65fb27
--- /dev/null
+++ b/searx/engines/elasticsearch.py
@@ -0,0 +1,142 @@
+from json import loads, dumps
+from lxml import html
+from urllib.parse import quote, urljoin
+from requests.auth import HTTPBasicAuth
+from searx.utils import extract_text, get_torrent_size
+
+
+base_url = 'http://localhost:9200'
+username = ''
+password = ''
+index = ''
+search_url = base_url + '/' + index + '/_search'
+query_type = 'match'
+custom_query_json = {}
+show_metadata = False
+categories = ['general']
+
+
+def init(engine_settings):
+ if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
+ raise ValueError('unsupported query type', engine_settings['query_type'])
+
+ if index == '':
+ raise ValueError('index cannot be empty')
+
+
+def request(query, params):
+ if query_type not in _available_query_types:
+ return params
+
+ if username and password:
+ params['auth'] = HTTPBasicAuth(username, password)
+
+ params['url'] = search_url
+ params['method'] = 'GET'
+ params['data'] = dumps(_available_query_types[query_type](query))
+ params['headers']['Content-Type'] = 'application/json'
+
+ return params
+
+
+def _match_query(query):
+ """
+ The standard for full text queries.
+ searx format: "key:value" e.g. city:berlin
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
+ """
+
+ try:
+ key, value = query.split(':')
+ except:
+ raise ValueError('query format must be "key:value"')
+
+ return {"query": {"match": {key: {'query': value}}}}
+
+
+def _simple_query_string_query(query):
+ """
+ Accepts query strings, but it is less strict than query_string
+ The field used can be specified in index.query.default_field in Elasticsearch.
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
+ """
+
+ return {'query': {'simple_query_string': {'query': query}}}
+
+
+def _term_query(query):
+ """
+ Accepts one term and the name of the field.
+ searx format: "key:value" e.g. city:berlin
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
+ """
+
+ try:
+ key, value = query.split(':')
+ except:
+ raise ValueError('query format must be key:value')
+
+ return {'query': {'term': {key: value}}}
+
+
+def _terms_query(query):
+ """
+ Accepts multiple terms and the name of the field.
+ searx format: "key:value1,value2" e.g. city:berlin,paris
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
+ """
+
+ try:
+ key, values = query.split(':')
+ except:
+ raise ValueError('query format must be key:value1,value2')
+
+ return {'query': {'terms': {key: values.split(',')}}}
+
+
+def _custom_query(query):
+ key, value = query.split(':')
+ custom_query = custom_query_json
+ for query_key, query_value in custom_query.items():
+ if query_key == '{{KEY}}':
+ custom_query[key] = custom_query.pop(query_key)
+ if query_value == '{{VALUE}}':
+ custom_query[query_key] = value
+ return custom_query
+
+
+def response(resp):
+ results = []
+
+ resp_json = loads(resp.text)
+ if 'error' in resp_json:
+ raise Exception(resp_json['error'])
+
+ for result in resp_json['hits']['hits']:
+ r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
+ r['template'] = 'key-value.html'
+
+ if show_metadata:
+ r['metadata'] = {'index': result['_index'],
+ 'id': result['_id'],
+ 'score': result['_score']}
+
+ results.append(r)
+
+ return results
+
+
+_available_query_types = {
+ # Full text queries
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
+ 'match': _match_query,
+ 'simple_query_string': _simple_query_string_query,
+
+ # Term-level queries
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
+ 'term': _term_query,
+ 'terms': _terms_query,
+
+ # Query JSON defined by the instance administrator.
+ 'custom': _custom_query,
+}
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 98460604c..c909ce11b 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -17,7 +17,7 @@ from searx.utils import html_to_text, match_language
# engine dependent config
-categories = None
+categories = []
paging = True
language_support = True
supported_languages_url = 'https://qwant.com/region'
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 81c2747fb..a569d9160 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -7,6 +7,7 @@ url_xpath = None
content_xpath = None
title_xpath = None
thumbnail_xpath = False
+categories = []
paging = False
suggestion_xpath = ''
results_xpath = ''
diff --git a/searx/search.py b/searx/search.py
index cd195825a..1cb2a603b 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -57,8 +57,11 @@ class EngineRef:
self.category = category
self.from_bang = from_bang
- def __str__(self):
- return "(" + self.name + "," + self.category + "," + str(self.from_bang) + ")"
+ def __repr__(self):
+ return "EngineRef({!r}, {!r}, {!r})".format(self.name, self.category, self.from_bang)
+
+ def __eq__(self, other):
+ return self.name == other.name and self.category == other.category and self.from_bang == other.from_bang
class SearchQuery:
@@ -87,8 +90,21 @@ class SearchQuery:
self.timeout_limit = timeout_limit
self.external_bang = external_bang
- def __str__(self):
- return self.query + ";" + str(self.engineref_list)
+ def __repr__(self):
+ return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
+ format(self.query, self.engineref_list, self.categories, self.lang, self.safesearch,
+ self.pageno, self.time_range, self.timeout_limit, self.external_bang)
+
+ def __eq__(self, other):
+ return self.query == other.query\
+ and self.engineref_list == other.engineref_list\
+ and self.categories == self.categories\
+ and self.lang == other.lang\
+ and self.safesearch == other.safesearch\
+ and self.pageno == other.pageno\
+ and self.time_range == other.time_range\
+ and self.timeout_limit == other.timeout_limit\
+ and self.external_bang == other.external_bang
def send_http_request(engine, request_params):
@@ -110,7 +126,8 @@ def send_http_request(engine, request_params):
req = requests_lib.get
else:
req = requests_lib.post
- request_args['data'] = request_params['data']
+
+ request_args['data'] = request_params['data']
# send the request
return req(request_params['url'], **request_args)
diff --git a/searx/settings.yml b/searx/settings.yml
index 54352bbfc..78ae26b97 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -17,6 +17,12 @@ server:
image_proxy : False # Proxying image results through searx
http_protocol_version : "1.0" # 1.0 and 1.1 are supported
method: "POST" # POST queries are more secure as they don't show up in history but may cause problems when using Firefox containers
+ default_http_headers:
+ X-Content-Type-Options : nosniff
+ X-XSS-Protection : 1; mode=block
+ X-Download-Options : noopen
+ X-Robots-Tag : noindex, nofollow
+ Referrer-Policy : no-referrer
ui:
static_path : "" # Custom static path - leave it blank if you didn't change
@@ -225,6 +231,20 @@ engines:
shortcut : ew
disabled : True
+# - name : elasticsearch
+# shortcut : es
+# engine : elasticsearch
+# base_url : http://localhost:9200
+# username : elastic
+# password : changeme
+# index : my-index
+# # available options: match, simple_query_string, term, terms, custom
+# query_type : match
+# # if query_type is set to custom, provide your query here
+# #custom_query_json: {"query":{"match_all": {}}}
+# #show_metadata: False
+# disabled : True
+
- name : wikidata
engine : wikidata
shortcut : wd
diff --git a/searx/webapp.py b/searx/webapp.py
index 46d547d52..326200cec 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -44,7 +44,7 @@ from urllib.parse import urlencode, urlparse, urljoin, urlsplit
from pygments import highlight
from pygments.lexers import get_lexer_by_name
-from pygments.formatters import HtmlFormatter
+from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module
from werkzeug.middleware.proxy_fix import ProxyFix
from flask import (
@@ -111,7 +111,7 @@ app = Flask(
app.jinja_env.trim_blocks = True
app.jinja_env.lstrip_blocks = True
-app.jinja_env.add_extension('jinja2.ext.loopcontrols')
+app.jinja_env.add_extension('jinja2.ext.loopcontrols') # pylint: disable=no-member
app.secret_key = settings['server']['secret_key']
# see https://flask.palletsprojects.com/en/1.1.x/cli/
@@ -488,6 +488,16 @@ def pre_request():
@app.after_request
+def add_default_headers(response):
+ # set default http headers
+ for header, value in settings['server'].get('default_http_headers', {}).items():
+ if header in response.headers:
+ continue
+ response.headers[header] = value
+ return response
+
+
+@app.after_request
def post_request(response):
total_time = time() - request.start_time
timings_all = ['total;dur=' + str(round(total_time * 1000, 3))]
@@ -537,10 +547,12 @@ def index():
# redirect to search if there's a query in the request
if request.form.get('q'):
- return redirect(url_for('search'), 308)
+ query = ('?' + request.query_string.decode()) if request.query_string else ''
+ return redirect(url_for('search') + query, 308)
return render(
'index.html',
+ selected_categories=get_selected_categories(request.preferences, request.form),
)
@@ -556,8 +568,8 @@ def search():
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
- # check if there is query
- if request.form.get('q') is None:
+ # check if there is query (not None and not an empty string)
+ if not request.form.get('q'):
if output_format == 'html':
return render(
'index.html',
@@ -577,15 +589,12 @@ def search():
result_container = search.search()
+ except SearxParameterException as e:
+ logger.exception('search error: SearxParameterException')
+ return index_error(output_format, e.message), 400
except Exception as e:
- # log exception
logger.exception('search error')
-
- # is it an invalid input parameter or something else ?
- if (issubclass(e.__class__, SearxParameterException)):
- return index_error(output_format, e.message), 400
- else:
- return index_error(output_format, gettext('search error')), 500
+ return index_error(output_format, gettext('search error')), 500
# results
results = result_container.get_ordered_results()