diff options
| -rw-r--r-- | searx/__init__.py | 2 | ||||
| -rw-r--r-- | searx/autocomplete.py | 2 | ||||
| -rw-r--r-- | searx/engines/__init__.py | 2 | ||||
| -rw-r--r-- | searx/engines/startpage.py | 10 | ||||
| -rw-r--r-- | searx/poolrequests.py | 8 | ||||
| -rw-r--r-- | searx/search.py | 11 | ||||
| -rw-r--r-- | searx/settings.yml | 31 | ||||
| -rw-r--r-- | searx/settings_robot.yml | 22 | ||||
| -rw-r--r-- | searx/utils.py | 3 | ||||
| -rw-r--r-- | searx/webapp.py | 35 |
10 files changed, 81 insertions, 45 deletions
diff --git a/searx/__init__.py b/searx/__init__.py index 2d545a809..ea21e8f13 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -40,7 +40,7 @@ else: with open(settings_path) as settings_yaml: settings = load(settings_yaml) -if settings.get('server', {}).get('debug'): +if settings.get('general', {}).get('debug'): logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 1a324b8a9..264d0cc1f 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -29,7 +29,7 @@ from searx.poolrequests import get as http_get def get(*args, **kwargs): if 'timeout' not in kwargs: - kwargs['timeout'] = settings['server']['request_timeout'] + kwargs['timeout'] = settings['outgoing']['request_timeout'] return http_get(*args, **kwargs) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 42e1f08bc..447138d3b 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -75,7 +75,7 @@ def load_engine(engine_data): engine.safesearch = False if not hasattr(engine, 'timeout'): - engine.timeout = settings['server']['request_timeout'] + engine.timeout = settings['outgoing']['request_timeout'] if not hasattr(engine, 'shortcut'): engine.shortcut = '' diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 9d5b4befe..7d58f7f01 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -66,7 +66,15 @@ def response(resp): url = link.attrib.get('href') # block google-ad url's - if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): + if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url): + continue + + # block startpage search url's + if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): + continue + + # block ixquick search url's + if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): continue title = escape(extract_text(link)) diff --git a/searx/poolrequests.py b/searx/poolrequests.py index e2a757665..c44bdc7e2 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -39,11 +39,11 @@ class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter): block=self._pool_block, **self._conn_params) -if settings.get('source_ips'): +if settings['outgoing'].get('source_ips'): http_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=100, source_address=(source_ip, 0)) - for source_ip in settings['source_ips']) + for source_ip in settings['outgoing']['source_ips']) https_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=100, source_address=(source_ip, 0)) - for source_ip in settings['source_ips']) + for source_ip in settings['outgoing']['source_ips']) else: http_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=100), )) https_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=100), )) @@ -69,7 +69,7 @@ def request(method, url, **kwargs): """same as requests/requests/api.py request(...) except it use SessionSinglePool and force proxies""" global settings session = SessionSinglePool() - kwargs['proxies'] = settings.get('outgoing_proxies', None) + kwargs['proxies'] = settings['outgoing'].get('proxies', None) response = session.request(method=method, url=url, **kwargs) session.close() return response diff --git a/searx/search.py b/searx/search.py index bb440352b..1bf05f7f9 100644 --- a/searx/search.py +++ b/searx/search.py @@ -23,6 +23,7 @@ from operator import itemgetter from Queue import Queue from time import time from urlparse import urlparse, unquote +from searx import settings from searx.engines import ( categories, engines ) @@ -205,6 +206,10 @@ def score_results(results): # if there is no duplicate found, append result else: res['score'] = score + # if the result has no scheme, use http as default + if res['parsed_url'].scheme == '': + res['parsed_url'] = res['parsed_url']._replace(scheme="http") + results.append(res) results = sorted(results, key=itemgetter('score'), reverse=True) @@ -480,9 +485,9 @@ class Search(object): try: # 0 = None, 1 = Moderate, 2 = Strict - request_params['safesearch'] = int(request.cookies.get('safesearch', 1)) - except ValueError: - request_params['safesearch'] = 1 + request_params['safesearch'] = int(request.cookies.get('safesearch')) + except Exception: + request_params['safesearch'] = settings['search']['safe_search'] # update request parameters dependent on # search-engine (contained in engines folder) diff --git a/searx/settings.yml b/searx/settings.yml index 03d895363..ffc3044a5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1,27 +1,36 @@ +general: + debug : False # Debug mode, only for development + +search: + safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict + autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default + server: port : 8888 + bind_address : "127.0.0.1" # address to listen on secret_key : "ultrasecretkey" # change this! - debug : False # Debug mode, only for development - request_timeout : 2.0 # seconds base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" + image_proxy : False # Proxying image results through searx + +ui: themes_path : "" # Custom ui themes path - leave it blank if you didn't change default_theme : oscar # ui theme - useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator - image_proxy : False # Proxying image results through searx default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section +outgoing: # communication with search engines + request_timeout : 2.0 # seconds + useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator # uncomment below section if you want to use a proxy # see http://docs.python-requests.org/en/latest/user/advanced/#proxies # SOCKS proxies are not supported : see https://github.com/kennethreitz/requests/pull/478 -#outgoing_proxies : -# http : http://127.0.0.1:8080 -# https: http://127.0.0.1:8080 - +# proxies : +# http : http://127.0.0.1:8080 +# https: http://127.0.0.1:8080 # uncomment below section only if you have more than one network interface # which can be the source of outgoing search requests -#source_ips: -# - 1.1.1.1 -# - 1.1.1.2 +# source_ips: +# - 1.1.1.1 +# - 1.1.1.2 engines: - name : wikipedia diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index c6fe2282f..f14443cf5 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -1,13 +1,25 @@ +general: + debug : False + +search: + safe_search : 0 + autocomplete : 0 + server: port : 11111 + bind_address : 127.0.0.1 secret_key : "ultrasecretkey" # change this! - debug : False - request_timeout : 3.0 # seconds - base_url: False + base_url : False + image_proxy : False + +ui: themes_path : "" default_theme : default - https_rewrite : True - image_proxy : False + default_locale : "" + +outgoing: + request_timeout : 1.0 # seconds + useragent_suffix : "" engines: - name : general_dummy diff --git a/searx/utils.py b/searx/utils.py index c9784159c..cc31726b6 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -26,6 +26,7 @@ ua_versions = ('33.0', ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64', 'X11; Linux x86') + ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}" blocked_tags = ('script', @@ -40,7 +41,7 @@ def gen_useragent(): def searx_useragent(): return 'searx/{searx_version} {suffix}'.format( searx_version=VERSION_STRING, - suffix=settings['server'].get('useragent_suffix', '')) + suffix=settings['outgoing'].get('useragent_suffix', '')) def highlight_content(content, query): diff --git a/searx/webapp.py b/searx/webapp.py index fb7157b47..7f1621a6a 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -77,11 +77,11 @@ except ImportError: static_path, templates_path, themes =\ - get_themes(settings['themes_path'] - if settings.get('themes_path') + get_themes(settings['ui']['themes_path'] + if settings['ui']['themes_path'] else searx_dir) -default_theme = settings['server'].get('default_theme', 'default') +default_theme = settings['ui']['default_theme'] static_files = get_static_files(searx_dir) @@ -121,15 +121,15 @@ _category_names = (gettext('files'), gettext('news'), gettext('map')) -outgoing_proxies = settings.get('outgoing_proxies', None) +outgoing_proxies = settings['outgoing'].get('proxies', None) @babel.localeselector def get_locale(): locale = request.accept_languages.best_match(settings['locales'].keys()) - if settings['server'].get('default_locale'): - locale = settings['server']['default_locale'] + if settings['ui'].get('default_locale'): + locale = settings['ui']['default_locale'] if request.cookies.get('locale', '') in settings['locales']: locale = request.cookies.get('locale', '') @@ -263,7 +263,7 @@ def image_proxify(url): def render(template_name, override_theme=None, **kwargs): blocked_engines = get_blocked_engines(engines, request.cookies) - autocomplete = request.cookies.get('autocomplete') + autocomplete = request.cookies.get('autocomplete', settings['search']['autocomplete']) if autocomplete not in autocomplete_backends: autocomplete = None @@ -312,7 +312,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['method'] = request.cookies.get('method', 'POST') - kwargs['safesearch'] = request.cookies.get('safesearch', '1') + kwargs['safesearch'] = request.cookies.get('safesearch', str(settings['search']['safe_search'])) # override url_for function in templates kwargs['url_for'] = url_for_theme @@ -491,7 +491,7 @@ def autocompleter(): return '', 400 # run autocompleter - completer = autocomplete_backends.get(request.cookies.get('autocomplete')) + completer = autocomplete_backends.get(request.cookies.get('autocomplete', settings['search']['autocomplete'])) # parse searx specific autocompleter results like !bang raw_results = searx_bang(query) @@ -542,7 +542,7 @@ def preferences(): locale = None autocomplete = '' method = 'POST' - safesearch = '1' + safesearch = settings['search']['safe_search'] for pd_name, pd in request.form.items(): if pd_name.startswith('category_'): category = pd_name[9:] @@ -624,7 +624,7 @@ def preferences(): resp.set_cookie('method', method, max_age=cookie_max_age) - resp.set_cookie('safesearch', safesearch, max_age=cookie_max_age) + resp.set_cookie('safesearch', str(safesearch), max_age=cookie_max_age) resp.set_cookie('image_proxy', image_proxy, max_age=cookie_max_age) @@ -640,12 +640,12 @@ def preferences(): stats[e.name] = {'time': None, 'warn_timeout': False, 'warn_time': False} - if e.timeout > settings['server']['request_timeout']: + if e.timeout > settings['outgoing']['request_timeout']: stats[e.name]['warn_timeout'] = True for engine_stat in get_engines_stats()[0][1]: stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3) - if engine_stat.get('avg') > settings['server']['request_timeout']: + if engine_stat.get('avg') > settings['outgoing']['request_timeout']: stats[engine_stat.get('name')]['warn_time'] = True # end of stats @@ -683,7 +683,7 @@ def image_proxy(): resp = requests.get(url, stream=True, - timeout=settings['server'].get('request_timeout', 2), + timeout=settings['outgoing']['request_timeout'], headers=headers, proxies=outgoing_proxies) @@ -775,9 +775,10 @@ def clear_cookies(): def run(): app.run( - debug=settings['server']['debug'], - use_debugger=settings['server']['debug'], - port=settings['server']['port'] + debug=settings['general']['debug'], + use_debugger=settings['general']['debug'], + port=settings['server']['port'], + host=settings['server']['bind_address'] ) |