From b8cd3264644208d7afa1a239f829222d45226334 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Thu, 25 Feb 2021 17:42:52 +0100
Subject: Add searx_extra package

Split the utils directory into:
* searx_extra contains update scripts, standalone_searx.py
* utils contains the files to build and setup searx.
---
 .github/workflows/data-update.yml                |  16 +-
 Makefile                                         |   4 +-
 docs/index.rst                                   |   1 +
 docs/searx_extra/index.rst                       |  14 ++
 docs/searx_extra/standalone_searx.py.rst         |   9 +
 docs/utils/index.rst                             |  10 +-
 docs/utils/standalone_searx.py.rst               |  11 --
 searx_extra/__init__.py                          |   0
 searx_extra/google_search.py                     |  35 ++++
 searx_extra/standalone_searx.py                  | 217 +++++++++++++++++++++++
 searx_extra/update/__init__.py                   |   0
 searx_extra/update/update_ahmia_blacklist.py     |  30 ++++
 searx_extra/update/update_currencies.py          | 150 ++++++++++++++++
 searx_extra/update/update_engine_descriptions.py | 202 +++++++++++++++++++++
 searx_extra/update/update_external_bangs.py      | 157 ++++++++++++++++
 searx_extra/update/update_firefox_version.py     |  68 +++++++
 searx_extra/update/update_languages.py           | 205 +++++++++++++++++++++
 searx_extra/update/update_translations.sh        |  15 ++
 searx_extra/update/update_wikidata_units.py      |  54 ++++++
 setup.py                                         |   2 +-
 tests/unit/test_standalone_searx.py              |  20 +--
 utils/fetch_ahmia_blacklist.py                   |  33 ----
 utils/fetch_currencies.py                        | 151 ----------------
 utils/fetch_engine_descriptions.py               | 206 ---------------------
 utils/fetch_external_bangs.py                    | 161 -----------------
 utils/fetch_firefox_version.py                   |  73 --------
 utils/fetch_languages.py                         | 207 ---------------------
 utils/fetch_wikidata_units.py                    |  56 ------
 utils/google_search.py                           |  35 ----
 utils/standalone_searx.py                        | 217 -----------------------
 utils/update-translations.sh                     |  15 --
 31 files changed, 1173 insertions(+), 1201 deletions(-)
 create mode 100644 docs/searx_extra/index.rst
 create mode 100644 docs/searx_extra/standalone_searx.py.rst
 delete mode 100644 docs/utils/standalone_searx.py.rst
 create mode 100644 searx_extra/__init__.py
 create mode 100644 searx_extra/google_search.py
 create mode 100755 searx_extra/standalone_searx.py
 create mode 100644 searx_extra/update/__init__.py
 create mode 100755 searx_extra/update/update_ahmia_blacklist.py
 create mode 100755 searx_extra/update/update_currencies.py
 create mode 100755 searx_extra/update/update_engine_descriptions.py
 create mode 100755 searx_extra/update/update_external_bangs.py
 create mode 100755 searx_extra/update/update_firefox_version.py
 create mode 100755 searx_extra/update/update_languages.py
 create mode 100755 searx_extra/update/update_translations.sh
 create mode 100755 searx_extra/update/update_wikidata_units.py
 delete mode 100755 utils/fetch_ahmia_blacklist.py
 delete mode 100644 utils/fetch_currencies.py
 delete mode 100644 utils/fetch_engine_descriptions.py
 delete mode 100755 utils/fetch_external_bangs.py
 delete mode 100755 utils/fetch_firefox_version.py
 delete mode 100644 utils/fetch_languages.py
 delete mode 100644 utils/fetch_wikidata_units.py
 delete mode 100644 utils/google_search.py
 delete mode 100755 utils/standalone_searx.py
 delete mode 100755 utils/update-translations.sh

diff --git a/.github/workflows/data-update.yml b/.github/workflows/data-update.yml
index c9c6b29a4..eb9bed8c8 100644
--- a/.github/workflows/data-update.yml
+++ b/.github/workflows/data-update.yml
@@ -11,12 +11,12 @@ jobs:
     strategy:
       matrix:
         fetch:
-          - ahmia_blacklist
-          - currencies
-          - external_bangs
-          - firefox_version
-          - languages
-          - wikidata_units
+          - update_ahmia_blacklist.py
+          - update_currencies.py
+          - update_external_bangs.py
+          - update_firefox_version.py
+          - update_languages.py
+          - update_wikidata_units.py
     steps:
       - name: Checkout
         uses: actions/checkout@v2
@@ -45,10 +45,10 @@ jobs:
 
       - name: Fetch data
         env:
-          FETCH_SCRIPT: utils/fetch_${{ matrix.fetch }}.py
+          FETCH_SCRIPT: ./searx_extra/update/${{ matrix.fetch }}
         run: |
           source local/py3/bin/activate
-          python $FETCH_SCRIPT
+          $FETCH_SCRIPT
 
       - name: Create Pull Request
         id: cpr
diff --git a/Makefile b/Makefile
index 4e451b7ca..9917da78b 100644
--- a/Makefile
+++ b/Makefile
@@ -195,8 +195,8 @@ PYLINT_FILES=\
 	searx/engines/google_videos.py \
 	searx/engines/google_images.py \
 	searx/engines/mediathekviewweb.py \
-	utils/fetch_external_bangs.py \
-	searx/engines/google_scholar.py
+	searx/engines/google_scholar.py \
+	searx_extra/update/update_external_bangs.py
 
 test.pylint: pyenvinstall
 	$(call cmd,pylint,$(PYLINT_FILES))
diff --git a/docs/index.rst b/docs/index.rst
index 9e590867c..a406da197 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -30,6 +30,7 @@ anyone, you can set up your own, see :ref:`installation`.
    user/index
    admin/index
    dev/index
+   searx_extra/index
    utils/index
    blog/index
 
diff --git a/docs/searx_extra/index.rst b/docs/searx_extra/index.rst
new file mode 100644
index 000000000..64d0b9047
--- /dev/null
+++ b/docs/searx_extra/index.rst
@@ -0,0 +1,14 @@
+.. _searx_extra:
+
+======================================================
+Tooling box ``searx_extra`` for developers and users
+======================================================
+
+In the folder :origin:`searx_extra/` we maintain some tools useful for
+developers and users.
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents
+
+   standalone_searx.py
diff --git a/docs/searx_extra/standalone_searx.py.rst b/docs/searx_extra/standalone_searx.py.rst
new file mode 100644
index 000000000..ff4b53387
--- /dev/null
+++ b/docs/searx_extra/standalone_searx.py.rst
@@ -0,0 +1,9 @@
+
+.. _standalone_searx.py:
+
+===================================
+``searx_extra/standalone_searx.py``
+===================================
+
+.. automodule:: searx_extra.standalone_searx
+  :members:
diff --git a/docs/utils/index.rst b/docs/utils/index.rst
index 28515318f..32baa5704 100644
--- a/docs/utils/index.rst
+++ b/docs/utils/index.rst
@@ -1,12 +1,11 @@
 .. _searx_utils:
 .. _toolboxing:
 
-=======================
-Tooling box ``utils/*``
-=======================
+========================================
+Tooling box ``utils`` for administrators
+========================================
 
-In the folder :origin:`utils/` we maintain some tools useful for admins and
-developers.
+In the folder :origin:`utils/` we maintain some tools useful for administrators.
 
 .. toctree::
    :maxdepth: 2
@@ -16,7 +15,6 @@ developers.
    filtron.sh
    morty.sh
    lxc.sh
-   standalone_searx.py
 
 .. _toolboxing common:
 
diff --git a/docs/utils/standalone_searx.py.rst b/docs/utils/standalone_searx.py.rst
deleted file mode 100644
index 557c4b75b..000000000
--- a/docs/utils/standalone_searx.py.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-
-.. _standalone_searx.py:
-
-=============================
-``utils/standalone_searx.py``
-=============================
-
-.. automodule:: standalone_searx
-  :members:
-
-
diff --git a/searx_extra/__init__.py b/searx_extra/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/searx_extra/google_search.py b/searx_extra/google_search.py
new file mode 100644
index 000000000..cad32eeca
--- /dev/null
+++ b/searx_extra/google_search.py
@@ -0,0 +1,35 @@
+from sys import argv, exit
+
+if not len(argv) > 1:
+    print('search query required')
+    exit(1)
+
+import requests
+from json import dumps
+from searx.engines import google
+from searx.search import default_request_params
+
+request_params = default_request_params()
+# Possible params
+# request_params['headers']['User-Agent'] = ''
+# request_params['category'] = ''
+request_params['pageno'] = 1
+request_params['language'] = 'en_us'
+request_params['time_range'] = ''
+
+params = google.request(argv[1], request_params)
+
+request_args = dict(
+    headers=request_params['headers'],
+    cookies=request_params['cookies'],
+)
+
+if request_params['method'] == 'GET':
+    req = requests.get
+else:
+    req = requests.post
+    request_args['data'] = request_params['data']
+
+resp = req(request_params['url'], **request_args)
+resp.search_params = request_params
+print(dumps(google.response(resp)))
diff --git a/searx_extra/standalone_searx.py b/searx_extra/standalone_searx.py
new file mode 100755
index 000000000..f52b7e80c
--- /dev/null
+++ b/searx_extra/standalone_searx.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+"""Script to run searx from terminal.
+
+Getting categories without initiate the engine will only return `['general']`
+
+>>> import searx.engines
+... list(searx.engines.categories.keys())
+['general']
+>>> import searx.search
+... searx.search.initialize()
+... list(searx.engines.categories.keys())
+['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
+
+Example to use this script:
+
+.. code::  bash
+
+    $ python3 searx_extra/standalone_searx.py rain
+
+Example to run it from python:
+
+>>> import importlib
+... import json
+... import sys
+... import searx.engines
+... import searx.search
+... search_query = 'rain'
+... # initialize engines
+... searx.search.initialize()
+... # load engines categories once instead of each time the function called
+... engine_cs = list(searx.engines.categories.keys())
+... # load module
+... spec = importlib.util.spec_from_file_location(
+...     'utils.standalone_searx', 'utils/standalone_searx.py')
+... sas = importlib.util.module_from_spec(spec)
+... spec.loader.exec_module(sas)
+... # use function from module
+... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
+... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
+... res_dict = sas.to_dict(search_q)
+... sys.stdout.write(json.dumps(
+...     res_dict, sort_keys=True, indent=4, ensure_ascii=False,
+...     default=sas.json_serial))
+{
+    "answers": [],
+    "infoboxes": [ {...} ],
+    "paging": true,
+    "results": [... ],
+    "results_number": 820000000.0,
+    "search": {
+        "lang": "all",
+        "pageno": 1,
+        "q": "rain",
+        "safesearch": 0,
+        "timerange": null
+    },
+    "suggestions": [...]
+}
+"""  # noqa: E501
+# pylint: disable=pointless-string-statement
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2016- by Alexandre Flament, <alex@al-f.net>
+'''
+# pylint: disable=wrong-import-position
+import argparse
+import sys
+from datetime import datetime
+from json import dumps
+from typing import Any, Dict, List, Optional
+
+import searx
+import searx.preferences
+import searx.query
+import searx.search
+import searx.webadapter
+
+EngineCategoriesVar = Optional[List[str]]
+
+
+def get_search_query(
+        args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
+) -> searx.search.SearchQuery:
+    """Get  search results for the query"""
+    if engine_categories is None:
+        engine_categories = list(searx.engines.categories.keys())
+    try:
+        category = args.category.decode('utf-8')
+    except AttributeError:
+        category = args.category
+    form = {
+        "q": args.query,
+        "categories": category,
+        "pageno": str(args.pageno),
+        "language": args.lang,
+        "time_range": args.timerange
+    }
+    preferences = searx.preferences.Preferences(
+        ['oscar'], engine_categories, searx.engines.engines, [])
+    preferences.key_value_settings['safesearch'].parse(args.safesearch)
+
+    search_query = searx.webadapter.get_search_query_from_webapp(
+        preferences, form)[0]
+    return search_query
+
+
+def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Remove parsed url from dict."""
+    for result in results:
+        del result['parsed_url']
+    return results
+
+
+def json_serial(obj: Any) -> Any:
+    """JSON serializer for objects not serializable by default json code.
+
+    :raise TypeError: raised when **obj** is not serializable
+    """
+    if isinstance(obj, datetime):
+        serial = obj.isoformat()
+        return serial
+    if isinstance(obj, bytes):
+        return obj.decode('utf8')
+    if isinstance(obj, set):
+        return list(obj)
+    raise TypeError("Type ({}) not serializable".format(type(obj)))
+
+
+def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
+    """Get result from parsed arguments."""
+    result_container = searx.search.Search(search_query).search()
+    result_container_json = {
+        "search": {
+            "q": search_query.query,
+            "pageno": search_query.pageno,
+            "lang": search_query.lang,
+            "safesearch": search_query.safesearch,
+            "timerange": search_query.time_range,
+        },
+        "results": no_parsed_url(result_container.get_ordered_results()),
+        "infoboxes": result_container.infoboxes,
+        "suggestions": list(result_container.suggestions),
+        "answers": list(result_container.answers),
+        "paging": result_container.paging,
+        "results_number": result_container.results_number()
+    }
+    return result_container_json
+
+
+def parse_argument(
+        args: Optional[List[str]]=None,
+        category_choices: EngineCategoriesVar=None
+) -> argparse.Namespace:
+    """Parse command line.
+
+    :raise SystemExit: Query argument required on `args`
+
+    Examples:
+
+    >>> import importlib
+    ... # load module
+    ... spec = importlib.util.spec_from_file_location(
+    ...     'utils.standalone_searx', 'utils/standalone_searx.py')
+    ... sas = importlib.util.module_from_spec(spec)
+    ... spec.loader.exec_module(sas)
+    ... sas.parse_argument()
+    usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
+                     query
+    SystemExit: 2
+    >>> sas.parse_argument(['rain'])
+    Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
+    """  # noqa: E501
+    if not category_choices:
+        category_choices = list(searx.engines.categories.keys())
+    parser = argparse.ArgumentParser(description='Standalone searx.')
+    parser.add_argument('query', type=str,
+                        help='Text query')
+    parser.add_argument('--category', type=str, nargs='?',
+                        choices=category_choices,
+                        default='general',
+                        help='Search category')
+    parser.add_argument('--lang', type=str, nargs='?', default='all',
+                        help='Search language')
+    parser.add_argument('--pageno', type=int, nargs='?', default=1,
+                        help='Page number starting from 1')
+    parser.add_argument(
+        '--safesearch', type=str, nargs='?',
+        choices=['0', '1', '2'], default='0',
+        help='Safe content filter from none to strict')
+    parser.add_argument(
+        '--timerange', type=str,
+        nargs='?', choices=['day', 'week', 'month', 'year'],
+        help='Filter by time range')
+    return parser.parse_args(args)
+
+
+if __name__ == '__main__':
+    searx.search.initialize()
+    engine_cs = list(searx.engines.categories.keys())
+    prog_args = parse_argument(category_choices=engine_cs)
+    search_q = get_search_query(prog_args, engine_categories=engine_cs)
+    res_dict = to_dict(search_q)
+    sys.stdout.write(dumps(
+        res_dict, sort_keys=True, indent=4, ensure_ascii=False,
+        default=json_serial))
diff --git a/searx_extra/update/__init__.py b/searx_extra/update/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/searx_extra/update/update_ahmia_blacklist.py b/searx_extra/update/update_ahmia_blacklist.py
new file mode 100755
index 000000000..f645880e6
--- /dev/null
+++ b/searx_extra/update/update_ahmia_blacklist.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+# This script saves Ahmia's blacklist for onion sites.
+# More info in https://ahmia.fi/blacklist/
+
+# set path
+from os.path import join
+
+import requests
+from searx import searx_dir
+
+URL = 'https://ahmia.fi/blacklist/banned/'
+
+
+def fetch_ahmia_blacklist():
+    resp = requests.get(URL, timeout=3.0)
+    if resp.status_code != 200:
+        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
+    else:
+        blacklist = resp.text.split()
+        return blacklist
+
+
+def get_ahmia_blacklist_filename():
+    return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
+
+
+blacklist = fetch_ahmia_blacklist()
+with open(get_ahmia_blacklist_filename(), "w") as f:
+    f.write('\n'.join(blacklist))
diff --git a/searx_extra/update/update_currencies.py b/searx_extra/update/update_currencies.py
new file mode 100755
index 000000000..0cfb7a951
--- /dev/null
+++ b/searx_extra/update/update_currencies.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+
+import re
+import unicodedata
+import json
+
+# set path
+from sys import path
+from os.path import realpath, dirname, join
+
+from searx import searx_dir, settings
+from searx.engines.wikidata import send_wikidata_query
+
+
+# ORDER BY (with all the query fields) is important to keep a deterministic result order
+# so multiple invokation of this script doesn't change currencies.json
+SARQL_REQUEST = """
+SELECT DISTINCT ?iso4217 ?unit ?unicode ?label ?alias WHERE {
+  ?item wdt:P498 ?iso4217; rdfs:label ?label.
+  OPTIONAL { ?item skos:altLabel ?alias FILTER (LANG (?alias) = LANG(?label)). }
+  OPTIONAL { ?item wdt:P5061 ?unit. }
+  OPTIONAL { ?item wdt:P489 ?symbol.
+             ?symbol wdt:P487 ?unicode. }
+  MINUS { ?item wdt:P582 ?end_data . }                  # Ignore monney with an end date
+  MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . }      # Ignore "former entity" (obsolete currency)
+  FILTER(LANG(?label) IN (%LANGUAGES_SPARQL%)).
+}
+ORDER BY ?iso4217 ?unit ?unicode ?label ?alias
+"""
+
+# ORDER BY (with all the query fields) is important to keep a deterministic result order
+# so multiple invokation of this script doesn't change currencies.json
+SPARQL_WIKIPEDIA_NAMES_REQUEST = """
+SELECT DISTINCT ?iso4217 ?article_name WHERE {
+  ?item wdt:P498 ?iso4217 .
+  ?article schema:about ?item ;
+           schema:name ?article_name ;
+           schema:isPartOf [ wikibase:wikiGroup "wikipedia" ]
+  MINUS { ?item wdt:P582 ?end_data . }                  # Ignore monney with an end date
+  MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . }      # Ignore "former entity" (obsolete currency)
+  FILTER(LANG(?article_name) IN (%LANGUAGES_SPARQL%)).
+}
+ORDER BY ?iso4217 ?article_name
+"""
+
+
+LANGUAGES = settings['locales'].keys()
+LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
+
+
+def remove_accents(name):
+    return unicodedata.normalize('NFKD', name).lower()
+
+
+def remove_extra(name):
+    for c in ('(', ':'):
+        if c in name:
+            name = name.split(c)[0].strip()
+    return name
+
+
+def _normalize_name(name):
+    name = re.sub(' +', ' ', remove_accents(name.lower()).replace('-', ' '))
+    name = remove_extra(name)
+    return name
+
+
+def add_currency_name(db, name, iso4217, normalize_name=True):
+    db_names = db['names']
+
+    if normalize_name:
+        name = _normalize_name(name)
+
+    iso4217_set = db_names.setdefault(name, [])
+    if iso4217 not in iso4217_set:
+        iso4217_set.insert(0, iso4217)
+
+
+def add_currency_label(db, label, iso4217, language):
+    labels = db['iso4217'].setdefault(iso4217, {})
+    labels[language] = label
+
+
+def wikidata_request_result_iterator(request):
+    result = send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+    if result is not None:
+        for r in result['results']['bindings']:
+            yield r
+
+
+def fetch_db():
+    db = {
+        'names': {},
+        'iso4217': {},
+    }
+
+    for r in wikidata_request_result_iterator(SPARQL_WIKIPEDIA_NAMES_REQUEST):
+        iso4217 = r['iso4217']['value']
+        article_name = r['article_name']['value']
+        article_lang = r['article_name']['xml:lang']
+        add_currency_name(db, article_name, iso4217)
+        add_currency_label(db, article_name, iso4217, article_lang)
+
+    for r in wikidata_request_result_iterator(SARQL_REQUEST):
+        iso4217 = r['iso4217']['value']
+        if 'label' in r:
+            label = r['label']['value']
+            label_lang = r['label']['xml:lang']
+            add_currency_name(db, label, iso4217)
+            add_currency_label(db, label, iso4217, label_lang)
+
+        if 'alias' in r:
+            add_currency_name(db, r['alias']['value'], iso4217)
+
+        if 'unicode' in r:
+            add_currency_name(db, r['unicode']['value'], iso4217, normalize_name=False)
+
+        if 'unit' in r:
+            add_currency_name(db, r['unit']['value'], iso4217, normalize_name=False)
+
+    # reduce memory usage:
+    # replace lists with one item by the item.
+    # see searx.search.processors.online_currency.name_to_iso4217
+    for name in db['names']:
+        if len(db['names'][name]) == 1:
+            db['names'][name] = db['names'][name][0]
+
+    return db
+
+
+def get_filename():
+    return join(join(searx_dir, "data"), "currencies.json")
+
+
+def main():
+    #
+    db = fetch_db()
+    # static
+    add_currency_name(db, "euro", 'EUR')
+    add_currency_name(db, "euros", 'EUR')
+    add_currency_name(db, "dollar", 'USD')
+    add_currency_name(db, "dollars", 'USD')
+    add_currency_name(db, "peso", 'MXN')
+    add_currency_name(db, "pesos", 'MXN')
+
+    with open(get_filename(), 'w', encoding='utf8') as f:
+        json.dump(db, f, ensure_ascii=False, indent=4)
+
+if __name__ == '__main__':
+    main()
diff --git a/searx_extra/update/update_engine_descriptions.py b/searx_extra/update/update_engine_descriptions.py
new file mode 100755
index 000000000..109fdbfa0
--- /dev/null
+++ b/searx_extra/update/update_engine_descriptions.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+import sys
+import json
+from urllib.parse import quote, urlparse
+import detect_language
+from lxml.html import fromstring
+
+from searx.engines.wikidata import send_wikidata_query
+from searx.utils import extract_text
+import searx
+import searx.search
+import searx.poolrequests
+
+SPARQL_WIKIPEDIA_ARTICLE = """
+SELECT DISTINCT ?item ?name
+WHERE {
+  VALUES ?item { %IDS% }
+  ?article schema:about ?item ;
+              schema:inLanguage ?lang ;
+              schema:name ?name ;
+              schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] .
+  FILTER(?lang in (%LANGUAGES_SPARQL%)) .
+  FILTER (!CONTAINS(?name, ':')) .
+}
+"""
+
+SPARQL_DESCRIPTION = """
+SELECT DISTINCT ?item ?itemDescription
+WHERE {
+  VALUES ?item { %IDS% }
+  ?item schema:description ?itemDescription .
+  FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%))
+}
+ORDER BY ?itemLang
+"""
+
+LANGUAGES = searx.settings['locales'].keys()
+LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
+IDS = None
+
+descriptions = {}
+wd_to_engine_name = {}
+
+
+def normalize_description(description):
+    for c in [chr(c) for c in range(0, 31)]:
+        description = description.replace(c, ' ')
+    description = ' '.join(description.strip().split())
+    return description
+
+
+def update_description(engine_name, lang, description, source, replace=True):
+    if replace or lang not in descriptions[engine_name]:
+        descriptions[engine_name][lang] = [normalize_description(description), source]
+
+
+def get_wikipedia_summary(language, pageid):
+    search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+    url = search_url.format(title=quote(pageid), language=language)
+    try:
+        response = searx.poolrequests.get(url)
+        response.raise_for_status()
+        api_result = json.loads(response.text)
+        return api_result.get('extract')
+    except:
+        return None
+
+
+def detect_language(text):
+    r = cld3.get_language(str(text))  # pylint: disable=E1101
+    if r is not None and r.probability >= 0.98 and r.is_reliable:
+        return r.language
+    return None
+
+
+def get_website_description(url, lang1, lang2=None):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+        'DNT': '1',
+        'Upgrade-Insecure-Requests': '1',
+        'Sec-GPC': '1',
+        'Cache-Control': 'max-age=0',
+    }
+    if lang1 is not None:
+        lang_list = [lang1]
+        if lang2 is not None:
+            lang_list.append(lang2)
+        headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
+    try:
+        response = searx.poolrequests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+    except Exception:
+        return (None, None)
+
+    try:
+        html = fromstring(response.text)
+    except ValueError:
+        html = fromstring(response.content)
+
+    description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/title'))
+    lang = extract_text(html.xpath('/html/@lang'))
+    if lang is None and len(lang1) > 0:
+        lang = lang1
+    lang = detect_language(description) or lang or 'en'
+    lang = lang.split('_')[0]
+    lang = lang.split('-')[0]
+    return (lang, description)
+
+
+def initialize():
+    global descriptions, wd_to_engine_name, IDS
+    searx.search.initialize()
+    for engine_name, engine in searx.engines.engines.items():
+        descriptions[engine_name] = {}
+        wikidata_id = getattr(engine, "about", {}).get('wikidata_id')
+        if wikidata_id is not None:
+            wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name)
+
+    IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys())))
+
+
+def fetch_wikidata_descriptions():
+    global IDS
+    result = send_wikidata_query(SPARQL_DESCRIPTION
+                                 .replace('%IDS%', IDS)
+                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            lang = binding['itemDescription']['xml:lang']
+            description = binding['itemDescription']['value']
+            if ' ' in description:  # skip unique word description (like "website")
+                for engine_name in wd_to_engine_name[wikidata_id]:
+                    update_description(engine_name, lang, description, 'wikidata')
+
+
+def fetch_wikipedia_descriptions():
+    global IDS
+    result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE
+                                 .replace('%IDS%', IDS)
+                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            lang = binding['name']['xml:lang']
+            pageid = binding['name']['value']
+            description = get_wikipedia_summary(lang, pageid)
+            if description is not None and ' ' in description:
+                for engine_name in wd_to_engine_name[wikidata_id]:
+                    update_description(engine_name, lang, description, 'wikipedia')
+
+
+def normalize_url(url):
+    url = url.replace('{language}', 'en')
+    url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl()
+    url = url.replace('https://api.', 'https://')
+    return url
+
+
+def fetch_website_description(engine_name, website):
+    default_lang, default_description = get_website_description(website, None, None)
+    if default_lang is None or default_description is None:
+        return
+    if default_lang not in descriptions[engine_name]:
+        descriptions[engine_name][default_lang] = [normalize_description(default_description), website]
+    for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'):
+        if request_lang.split('-')[0] not in descriptions[engine_name]:
+            lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0])
+            if desc is not None and desc != default_description:
+                update_description(engine_name, lang, desc, website, replace=False)
+            else:
+                break
+
+
+def fetch_website_descriptions():
+    for engine_name, engine in searx.engines.engines.items():
+        website = getattr(engine, "about", {}).get('website')
+        if website is None:
+            website = normalize_url(getattr(engine, "search_url"))
+        if website is None:
+            website = normalize_url(getattr(engine, "base_url"))
+        if website is not None:
+            fetch_website_description(engine_name, website)
+
+
+def main():
+    initialize()
+    fetch_wikidata_descriptions()
+    fetch_wikipedia_descriptions()
+    fetch_website_descriptions()
+
+    sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/searx_extra/update/update_external_bangs.py b/searx_extra/update/update_external_bangs.py
new file mode 100755
index 000000000..e9dc0ff1d
--- /dev/null
+++ b/searx_extra/update/update_external_bangs.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+"""
+Update searx/data/external_bangs.json using the duckduckgo bangs.
+
+https://duckduckgo.com/newbang loads
+* a javascript which provides the bang version ( https://duckduckgo.com/bv1.js )
+* a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example )
+
+This script loads the javascript, then the bangs.
+
+The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ),
+but most probably it will requires to update RE_BANG_VERSION
+"""
+# pylint: disable=C0116
+
+import json
+import re
+from os.path import join
+
+import requests
+
+from searx import searx_dir  # pylint: disable=E0401 C0413
+
+
+# from https://duckduckgo.com/newbang
+URL_BV1 = 'https://duckduckgo.com/bv1.js'
+RE_BANG_VERSION = re.compile(r'\/bang\.v([0-9]+)\.js')
+HTTPS_COLON = 'https:'
+HTTP_COLON = 'http:'
+
+
+def get_bang_url():
+    response = requests.get(URL_BV1)
+    response.raise_for_status()
+
+    r = RE_BANG_VERSION.findall(response.text)
+    return f'https://duckduckgo.com/bang.v{r[0]}.js', r[0]
+
+
+def fetch_ddg_bangs(url):
+    response = requests.get(url)
+    response.raise_for_status()
+    return json.loads(response.content.decode())
+
+
+def merge_when_no_leaf(node):
+    """Minimize the number of nodes
+
+    A -> B -> C
+    B is child of A
+    C is child of B
+
+    If there are no C equals to '*', then each C are merged into A
+
+    For example:
+      d -> d -> g -> * (ddg*)
+        -> i -> g -> * (dig*)
+    becomes
+      d -> dg -> *
+        -> ig -> *
+    """
+    restart = False
+    if not isinstance(node, dict):
+        return
+
+    # create a copy of the keys so node can be modified
+    keys = list(node.keys())
+
+    for key in keys:
+        if key == '*':
+            continue
+
+        value = node[key]
+        value_keys = list(value.keys())
+        if '*' not in value_keys:
+            for value_key in value_keys:
+                node[key + value_key] = value[value_key]
+                merge_when_no_leaf(node[key + value_key])
+            del node[key]
+            restart = True
+        else:
+            merge_when_no_leaf(value)
+
+    if restart:
+        merge_when_no_leaf(node)
+
+
+def optimize_leaf(parent, parent_key, node):
+    if not isinstance(node, dict):
+        return
+
+    if len(node) == 1 and '*' in node and parent is not None:
+        parent[parent_key] = node['*']
+    else:
+        for key, value in node.items():
+            optimize_leaf(node, key, value)
+
+
+def parse_ddg_bangs(ddg_bangs):
+    bang_trie = {}
+    bang_urls = {}
+
+    for bang_definition in ddg_bangs:
+        # bang_list
+        bang_url = bang_definition['u']
+        if '{{{s}}}' not in bang_url:
+            # ignore invalid bang
+            continue
+
+        bang_url = bang_url.replace('{{{s}}}', chr(2))
+
+        # only for the https protocol: "https://example.com" becomes "//example.com"
+        if bang_url.startswith(HTTPS_COLON + '//'):
+            bang_url = bang_url[len(HTTPS_COLON):]
+
+        #
+        if bang_url.startswith(HTTP_COLON + '//') and bang_url[len(HTTP_COLON):] in bang_urls:
+            # if the bang_url uses the http:// protocol, and the same URL exists in https://
+            # then reuse the https:// bang definition. (written //example.com)
+            bang_def_output = bang_urls[bang_url[len(HTTP_COLON):]]
+        else:
+            # normal use case : new http:// URL or https:// URL (without "https:", see above)
+            bang_rank = str(bang_definition['r'])
+            bang_def_output = bang_url + chr(1) + bang_rank
+            bang_def_output = bang_urls.setdefault(bang_url, bang_def_output)
+
+        bang_urls[bang_url] = bang_def_output
+
+        # bang name
+        bang = bang_definition['t']
+
+        # bang_trie
+        t = bang_trie
+        for bang_letter in bang:
+            t = t.setdefault(bang_letter, {})
+        t = t.setdefault('*', bang_def_output)
+
+    # optimize the trie
+    merge_when_no_leaf(bang_trie)
+    optimize_leaf(None, None, bang_trie)
+
+    return bang_trie
+
+
+def get_bangs_filename():
+    return join(join(searx_dir, "data"), "external_bangs.json")
+
+
+if __name__ == '__main__':
+    bangs_url, bangs_version = get_bang_url()
+    print(f'fetch bangs from {bangs_url}')
+    output = {
+        'version': bangs_version,
+        'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url))
+    }
+    with open(get_bangs_filename(), 'w') as fp:
+        json.dump(output, fp, ensure_ascii=False, indent=4)
diff --git a/searx_extra/update/update_firefox_version.py b/searx_extra/update/update_firefox_version.py
new file mode 100755
index 000000000..6acfe76ce
--- /dev/null
+++ b/searx_extra/update/update_firefox_version.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+import json
+import requests
+import re
+from os.path import dirname, join
+from urllib.parse import urlparse, urljoin
+from distutils.version import LooseVersion, StrictVersion
+from lxml import html
+from searx import searx_dir
+
+URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
+RELEASE_PATH = '/pub/firefox/releases/'
+
+NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
+# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
+# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
+
+# 
+useragents = {
+    "versions": (),
+    "os": ('Windows NT 10.0; WOW64',
+           'X11; Linux x86_64'),
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
+}
+
+
+def fetch_firefox_versions():
+    resp = requests.get(URL, timeout=2.0)
+    if resp.status_code != 200:
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
+    else:
+        dom = html.fromstring(resp.text)
+        versions = []
+
+        for link in dom.xpath('//a/@href'):
+            url = urlparse(urljoin(URL, link))
+            path = url.path
+            if path.startswith(RELEASE_PATH):
+                version = path[len(RELEASE_PATH):-1]
+                if NORMAL_REGEX.match(version):
+                    versions.append(LooseVersion(version))
+
+        list.sort(versions, reverse=True)
+        return versions
+
+
+def fetch_firefox_last_versions():
+    versions = fetch_firefox_versions()
+
+    result = []
+    major_last = versions[0].version[0]
+    major_list = (major_last, major_last - 1)
+    for version in versions:
+        major_current = version.version[0]
+        if major_current in major_list:
+            result.append(version.vstring)
+
+    return result
+
+
+def get_useragents_filename():
+    return join(join(searx_dir, "data"), "useragents.json")
+
+
+useragents["versions"] = fetch_firefox_last_versions()
+with open(get_useragents_filename(), "w") as f:
+    json.dump(useragents, f, indent=4, ensure_ascii=False)
diff --git a/searx_extra/update/update_languages.py b/searx_extra/update/update_languages.py
new file mode 100755
index 000000000..e63282586
--- /dev/null
+++ b/searx_extra/update/update_languages.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+
+# This script generates languages.py from intersecting each engine's supported languages.
+#
+# Output files: searx/data/engines_languages.json and searx/languages.py
+
+import json
+from pathlib import Path
+from pprint import pformat
+from babel import Locale, UnknownLocaleError
+from babel.languages import get_global
+
+from searx import settings, searx_dir
+from searx.engines import initialize_engines, engines
+
+# Output files.
+engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
+languages_file = Path(searx_dir) / 'languages.py'
+
+
+# Fetchs supported languages for each engine and writes json file with those.
+def fetch_supported_languages():
+
+    engines_languages = dict()
+    names = list(engines)
+    names.sort()
+
+    for engine_name in names:
+        if hasattr(engines[engine_name], 'fetch_supported_languages'):
+            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
+            print("fetched %s languages from engine %s" % (
+                len(engines_languages[engine_name]), engine_name))
+            if type(engines_languages[engine_name]) == list:
+                engines_languages[engine_name] = sorted(engines_languages[engine_name])
+
+    # write json file
+    with open(engines_languages_file, 'w', encoding='utf-8') as f:
+        json.dump(engines_languages, f, indent=2, sort_keys=True)
+
+    return engines_languages
+
+
+# Get babel Locale object from lang_code if possible.
+def get_locale(lang_code):
+    try:
+        locale = Locale.parse(lang_code, sep='-')
+        return locale
+    except (UnknownLocaleError, ValueError):
+        return None
+
+
+# Join all language lists.
+def join_language_lists(engines_languages):
+    language_list = dict()
+    for engine_name in engines_languages:
+        for lang_code in engines_languages[engine_name]:
+
+            # apply custom fixes if necessary
+            if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
+                lang_code = next(lc for lc, alias in engines[engine_name].language_aliases.items()
+                                 if lang_code == alias)
+
+            locale = get_locale(lang_code)
+
+            # ensure that lang_code uses standard language and country codes
+            if locale and locale.territory:
+                lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
+            short_code = lang_code.split('-')[0]
+
+            # add language without country if not in list
+            if short_code not in language_list:
+                if locale:
+                    # get language's data from babel's Locale object
+                    language_name = locale.get_language_name().title()
+                    english_name = locale.english_name.split(' (')[0]
+                elif short_code in engines_languages['wikipedia']:
+                    # get language's data from wikipedia if not known by babel
+                    language_name = engines_languages['wikipedia'][short_code]['name']
+                    english_name = engines_languages['wikipedia'][short_code]['english_name']
+                else:
+                    language_name = None
+                    english_name = None
+
+                # add language to list
+                language_list[short_code] = {'name': language_name,
+                                             'english_name': english_name,
+                                             'counter': set(),
+                                             'countries': dict()}
+
+            # add language with country if not in list
+            if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
+                country_name = ''
+                if locale:
+                    # get country name from babel's Locale object
+                    country_name = locale.get_territory_name()
+
+                language_list[short_code]['countries'][lang_code] = {'country_name': country_name,
+                                                                     'counter': set()}
+
+            # count engine for both language_country combination and language alone
+            language_list[short_code]['counter'].add(engine_name)
+            if lang_code != short_code:
+                language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
+
+    return language_list
+
+
+# Filter language list so it only includes the most supported languages and countries
+def filter_language_list(all_languages):
+    min_engines_per_lang = 15
+    min_engines_per_country = 10
+    main_engines = [engine_name for engine_name in engines.keys()
+                    if 'general' in engines[engine_name].categories and
+                       engines[engine_name].supported_languages and
+                       not engines[engine_name].disabled]
+
+    # filter list to include only languages supported by most engines or all default general engines
+    filtered_languages = {code: lang for code, lang
+                          in all_languages.items()
+                          if (len(lang['counter']) >= min_engines_per_lang or
+                              all(main_engine in lang['counter']
+                                  for main_engine in main_engines))}
+
+    def _copy_lang_data(lang, country_name=None):
+        new_dict = dict()
+        new_dict['name'] = all_languages[lang]['name']
+        new_dict['english_name'] = all_languages[lang]['english_name']
+        if country_name:
+            new_dict['country_name'] = country_name
+        return new_dict
+
+    def _country_count(i):
+        return len(countries[sorted_countries[i]]['counter'])
+
+    # for each language get country codes supported by most engines or at least one country code
+    filtered_languages_with_countries = dict()
+    for lang, lang_data in filtered_languages.items():
+        countries = lang_data['countries']
+        filtered_countries = dict()
+
+        # get language's country codes with enough supported engines
+        for lang_country, country_data in countries.items():
+            if len(country_data['counter']) >= min_engines_per_country:
+                filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
+
+        # add language without countries too if there's more than one country to choose from
+        if len(filtered_countries) > 1:
+            filtered_countries[lang] = _copy_lang_data(lang)
+        elif len(filtered_countries) == 1:
+            # if there's only one country per language, it's not necessary to show country name
+            lang_country = next(iter(filtered_countries))
+            filtered_countries[lang_country]['country_name'] = None
+
+        # if no country has enough engines try to get most likely country code from babel
+        if not filtered_countries:
+            lang_country = None
+            subtags = get_global('likely_subtags').get(lang)
+            if subtags:
+                country_code = subtags.split('_')[-1]
+                if len(country_code) == 2:
+                    lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
+
+            if lang_country:
+                filtered_countries[lang_country] = _copy_lang_data(lang)
+            else:
+                filtered_countries[lang] = _copy_lang_data(lang)
+
+        filtered_languages_with_countries.update(filtered_countries)
+
+    return filtered_languages_with_countries
+
+
+# Write languages.py.
+def write_languages_file(languages):
+    file_headers = (
+        "# -*- coding: utf-8 -*-",
+        "# list of language codes",
+        "# this file is generated automatically by utils/fetch_languages.py",
+        "language_codes ="
+    )
+
+    language_codes = tuple([
+        (
+            code,
+            languages[code]['name'].split(' (')[0],
+            languages[code].get('country_name') or '',
+            languages[code].get('english_name') or ''
+        ) for code in sorted(languages)
+    ])
+
+    with open(languages_file, 'w') as new_file:
+        file_content = "{file_headers} \\\n{language_codes}".format(
+            file_headers='\n'.join(file_headers),
+            language_codes=pformat(language_codes, indent=4)
+        )
+        new_file.write(file_content)
+        new_file.close()
+
+
+if __name__ == "__main__":
+    initialize_engines(settings['engines'])
+    engines_languages = fetch_supported_languages()
+    all_languages = join_language_lists(engines_languages)
+    filtered_languages = filter_language_list(all_languages)
+    write_languages_file(filtered_languages)
diff --git a/searx_extra/update/update_translations.sh b/searx_extra/update/update_translations.sh
new file mode 100755
index 000000000..240387ae7
--- /dev/null
+++ b/searx_extra/update/update_translations.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# script to easily update translation language files
+
+# add new language:
+# pybabel init -i messages.pot -d searx/translations -l en
+
+SEARX_DIR='searx'
+
+pybabel extract -F babel.cfg -o messages.pot "$SEARX_DIR"
+for f in `ls "$SEARX_DIR"'/translations/'`; do
+    pybabel update -N -i messages.pot -d "$SEARX_DIR"'/translations/' -l "$f"
+done
+
+echo '[!] update done, edit .po files if required and run pybabel compile -d searx/translations/'
diff --git a/searx_extra/update/update_wikidata_units.py b/searx_extra/update/update_wikidata_units.py
new file mode 100755
index 000000000..1e6b8b9ca
--- /dev/null
+++ b/searx_extra/update/update_wikidata_units.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import json
+import collections
+
+# set path
+from os.path import join
+
+from searx import searx_dir
+from searx.engines.wikidata import send_wikidata_query
+
+
+# the response contains duplicate ?item with the different ?symbol
+# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
+# even if a ?item has different ?symbol of the same rank.
+# A deterministic result 
+# see:
+# * https://www.wikidata.org/wiki/Help:Ranking
+# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
+# * https://w.wiki/32BT
+#   see the result for https://www.wikidata.org/wiki/Q11582
+#   there are multiple symbols the same rank
+SARQL_REQUEST = """
+SELECT DISTINCT ?item ?symbol
+WHERE
+{
+  ?item wdt:P31/wdt:P279 wd:Q47574 .
+  ?item p:P5061 ?symbolP .
+  ?symbolP ps:P5061 ?symbol ;
+           wikibase:rank ?rank .
+  FILTER(LANG(?symbol) = "en").
+}
+ORDER BY ?item DESC(?rank) ?symbol
+"""
+
+
+def get_data():
+    results = collections.OrderedDict()
+    response = send_wikidata_query(SARQL_REQUEST)
+    for unit in response['results']['bindings']:
+        name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
+        unit = unit['symbol']['value']
+        if name not in results:
+            # ignore duplicate: always use the first one
+            results[name] = unit
+    return results
+
+
+def get_wikidata_units_filename():
+    return join(join(searx_dir, "data"), "wikidata_units.json")
+
+
+with open(get_wikidata_units_filename(), 'w') as f:
+    json.dump(get_data(), f, indent=4, ensure_ascii=False)
diff --git a/setup.py b/setup.py
index 09a3021ee..61227d199 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ setup(
     author='Adam Tauber',
     author_email='asciimoo@gmail.com',
     license='GNU Affero General Public License',
-    packages=find_packages(exclude=["tests*"]),
+    packages=find_packages(exclude=["tests*", "searx_extra"]),
     zip_safe=False,
     install_requires=requirements,
     extras_require={
diff --git a/tests/unit/test_standalone_searx.py b/tests/unit/test_standalone_searx.py
index 6cc230e6c..a69353c03 100644
--- a/tests/unit/test_standalone_searx.py
+++ b/tests/unit/test_standalone_searx.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 """Test utils/standalone_searx.py"""
 import datetime
-import importlib.util
 import io
 import sys
 
@@ -10,16 +9,7 @@ from nose2.tools import params
 
 from searx.search import SearchQuery, EngineRef, initialize
 from searx.testing import SearxTestCase
-
-
-def get_standalone_searx_module():
-    """Get standalone_searx module."""
-    module_name = 'utils.standalone_searx'
-    filename = 'utils/standalone_searx.py'
-    spec = importlib.util.spec_from_file_location(module_name, filename)
-    sas = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(sas)
-    return sas
+from searx_extra import standalone_searx as sas
 
 
 class StandaloneSearx(SearxTestCase):
@@ -33,7 +23,6 @@ class StandaloneSearx(SearxTestCase):
 
     def test_parse_argument_no_args(self):
         """Test parse argument without args."""
-        sas = get_standalone_searx_module()
         with patch.object(sys, 'argv', ['standalone_searx']), \
                 self.assertRaises(SystemExit):
             sys.stderr = io.StringIO()
@@ -42,7 +31,6 @@ class StandaloneSearx(SearxTestCase):
 
     def test_parse_argument_basic_args(self):
         """Test parse argument with basic args."""
-        sas = get_standalone_searx_module()
         query = 'red box'
         exp_dict = {
             'query': query, 'category': 'general', 'lang': 'all', 'pageno': 1,
@@ -56,7 +44,6 @@ class StandaloneSearx(SearxTestCase):
 
     def test_to_dict(self):
         """test to_dict."""
-        sas = get_standalone_searx_module()
         self.assertEqual(
             sas.to_dict(
                 sas.get_search_query(sas.parse_argument(['red box']))),
@@ -72,7 +59,6 @@ class StandaloneSearx(SearxTestCase):
 
     def test_to_dict_with_mock(self):
         """test to dict."""
-        sas = get_standalone_searx_module()
         with patch.object(sas.searx.search, 'Search') as mock_s:
             m_search = mock_s().search()
             m_sq = Mock()
@@ -97,7 +83,6 @@ class StandaloneSearx(SearxTestCase):
 
     def test_get_search_query(self):
         """test get_search_query."""
-        sas = get_standalone_searx_module()
         args = sas.parse_argument(['rain', ])
         search_q = sas.get_search_query(args)
         self.assertTrue(search_q)
@@ -106,7 +91,6 @@ class StandaloneSearx(SearxTestCase):
 
     def test_no_parsed_url(self):
         """test no_parsed_url func"""
-        sas = get_standalone_searx_module()
         self.assertEqual(
             sas.no_parsed_url([{'parsed_url': 'http://example.com'}]),
             [{}]
@@ -119,11 +103,9 @@ class StandaloneSearx(SearxTestCase):
     )
     def test_json_serial(self, arg, exp_res):
         """test json_serial func"""
-        sas = get_standalone_searx_module()
         self.assertEqual(sas.json_serial(arg), exp_res)
 
     def test_json_serial_error(self):
         """test error on json_serial."""
-        sas = get_standalone_searx_module()
         with self.assertRaises(TypeError):
             sas.json_serial('a')
diff --git a/utils/fetch_ahmia_blacklist.py b/utils/fetch_ahmia_blacklist.py
deleted file mode 100755
index 3e393edbe..000000000
--- a/utils/fetch_ahmia_blacklist.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env python
-
-# This script saves Ahmia's blacklist for onion sites.
-# More info in https://ahmia.fi/blacklist/
-
-# set path
-from sys import path
-from os.path import realpath, dirname, join
-path.append(realpath(dirname(realpath(__file__)) + '/../'))
-
-#
-import requests
-from searx import searx_dir
-
-URL = 'https://ahmia.fi/blacklist/banned/'
-
-
-def fetch_ahmia_blacklist():
-    resp = requests.get(URL, timeout=3.0)
-    if resp.status_code != 200:
-        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
-    else:
-        blacklist = resp.text.split()
-        return blacklist
-
-
-def get_ahmia_blacklist_filename():
-    return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
-
-
-blacklist = fetch_ahmia_blacklist()
-with open(get_ahmia_blacklist_filename(), "w") as f:
-    f.write('\n'.join(blacklist))
diff --git a/utils/fetch_currencies.py b/utils/fetch_currencies.py
deleted file mode 100644
index 8811049a5..000000000
--- a/utils/fetch_currencies.py
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/usr/bin/env python
-
-import re
-import unicodedata
-import json
-
-# set path
-from sys import path
-from os.path import realpath, dirname, join
-path.append(realpath(dirname(realpath(__file__)) + '/../'))
-
-from searx import searx_dir, settings
-from searx.engines.wikidata import send_wikidata_query
-
-
-# ORDER BY (with all the query fields) is important to keep a deterministic result order
-# so multiple invokation of this script doesn't change currencies.json
-SARQL_REQUEST = """
-SELECT DISTINCT ?iso4217 ?unit ?unicode ?label ?alias WHERE {
-  ?item wdt:P498 ?iso4217; rdfs:label ?label.
-  OPTIONAL { ?item skos:altLabel ?alias FILTER (LANG (?alias) = LANG(?label)). }
-  OPTIONAL { ?item wdt:P5061 ?unit. }
-  OPTIONAL { ?item wdt:P489 ?symbol.
-             ?symbol wdt:P487 ?unicode. }
-  MINUS { ?item wdt:P582 ?end_data . }                  # Ignore monney with an end date
-  MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . }      # Ignore "former entity" (obsolete currency)
-  FILTER(LANG(?label) IN (%LANGUAGES_SPARQL%)).
-}
-ORDER BY ?iso4217 ?unit ?unicode ?label ?alias
-"""
-
-# ORDER BY (with all the query fields) is important to keep a deterministic result order
-# so multiple invokation of this script doesn't change currencies.json
-SPARQL_WIKIPEDIA_NAMES_REQUEST = """
-SELECT DISTINCT ?iso4217 ?article_name WHERE {
-  ?item wdt:P498 ?iso4217 .
-  ?article schema:about ?item ;
-           schema:name ?article_name ;
-           schema:isPartOf [ wikibase:wikiGroup "wikipedia" ]
-  MINUS { ?item wdt:P582 ?end_data . }                  # Ignore monney with an end date
-  MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . }      # Ignore "former entity" (obsolete currency)
-  FILTER(LANG(?article_name) IN (%LANGUAGES_SPARQL%)).
-}
-ORDER BY ?iso4217 ?article_name
-"""
-
-
-LANGUAGES = settings['locales'].keys()
-LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
-
-
-def remove_accents(name):
-    return unicodedata.normalize('NFKD', name).lower()
-
-
-def remove_extra(name):
-    for c in ('(', ':'):
-        if c in name:
-            name = name.split(c)[0].strip()
-    return name
-
-
-def _normalize_name(name):
-    name = re.sub(' +', ' ', remove_accents(name.lower()).replace('-', ' '))
-    name = remove_extra(name)
-    return name
-
-
-def add_currency_name(db, name, iso4217, normalize_name=True):
-    db_names = db['names']
-
-    if normalize_name:
-        name = _normalize_name(name)
-
-    iso4217_set = db_names.setdefault(name, [])
-    if iso4217 not in iso4217_set:
-        iso4217_set.insert(0, iso4217)
-
-
-def add_currency_label(db, label, iso4217, language):
-    labels = db['iso4217'].setdefault(iso4217, {})
-    labels[language] = label
-
-
-def wikidata_request_result_iterator(request):
-    result = send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
-    if result is not None:
-        for r in result['results']['bindings']:
-            yield r
-
-
-def fetch_db():
-    db = {
-        'names': {},
-        'iso4217': {},
-    }
-
-    for r in wikidata_request_result_iterator(SPARQL_WIKIPEDIA_NAMES_REQUEST):
-        iso4217 = r['iso4217']['value']
-        article_name = r['article_name']['value']
-        article_lang = r['article_name']['xml:lang']
-        add_currency_name(db, article_name, iso4217)
-        add_currency_label(db, article_name, iso4217, article_lang)
-
-    for r in wikidata_request_result_iterator(SARQL_REQUEST):
-        iso4217 = r['iso4217']['value']
-        if 'label' in r:
-            label = r['label']['value']
-            label_lang = r['label']['xml:lang']
-            add_currency_name(db, label, iso4217)
-            add_currency_label(db, label, iso4217, label_lang)
-
-        if 'alias' in r:
-            add_currency_name(db, r['alias']['value'], iso4217)
-
-        if 'unicode' in r:
-            add_currency_name(db, r['unicode']['value'], iso4217, normalize_name=False)
-
-        if 'unit' in r:
-            add_currency_name(db, r['unit']['value'], iso4217, normalize_name=False)
-
-    # reduce memory usage:
-    # replace lists with one item by the item.
-    # see searx.search.processors.online_currency.name_to_iso4217
-    for name in db['names']:
-        if len(db['names'][name]) == 1:
-            db['names'][name] = db['names'][name][0]
-
-    return db
-
-
-def get_filename():
-    return join(join(searx_dir, "data"), "currencies.json")
-
-
-def main():
-    #
-    db = fetch_db()
-    # static
-    add_currency_name(db, "euro", 'EUR')
-    add_currency_name(db, "euros", 'EUR')
-    add_currency_name(db, "dollar", 'USD')
-    add_currency_name(db, "dollars", 'USD')
-    add_currency_name(db, "peso", 'MXN')
-    add_currency_name(db, "pesos", 'MXN')
-
-    with open(get_filename(), 'w', encoding='utf8') as f:
-        json.dump(db, f, ensure_ascii=False, indent=4)
-
-if __name__ == '__main__':
-    main()
diff --git a/utils/fetch_engine_descriptions.py b/utils/fetch_engine_descriptions.py
deleted file mode 100644
index 9ca001d45..000000000
--- a/utils/fetch_engine_descriptions.py
+++ /dev/null
@@ -1,206 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import json
-from urllib.parse import quote, urlparse
-from os.path import realpath, dirname
-import cld3
-from lxml.html import fromstring
-
-# set path
-sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
-
-from searx.engines.wikidata import send_wikidata_query
-from searx.utils import extract_text
-import searx
-import searx.search
-import searx.poolrequests
-
-SPARQL_WIKIPEDIA_ARTICLE = """
-SELECT DISTINCT ?item ?name
-WHERE {
-  VALUES ?item { %IDS% }
-  ?article schema:about ?item ;
-              schema:inLanguage ?lang ;
-              schema:name ?name ;
-              schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] .
-  FILTER(?lang in (%LANGUAGES_SPARQL%)) .
-  FILTER (!CONTAINS(?name, ':')) .
-}
-"""
-
-SPARQL_DESCRIPTION = """
-SELECT DISTINCT ?item ?itemDescription
-WHERE {
-  VALUES ?item { %IDS% }
-  ?item schema:description ?itemDescription .
-  FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%))
-}
-ORDER BY ?itemLang
-"""
-
-LANGUAGES = searx.settings['locales'].keys()
-LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
-IDS = None
-
-descriptions = {}
-wd_to_engine_name = {}
-
-
-def normalize_description(description):
-    for c in [chr(c) for c in range(0, 31)]:
-        description = description.replace(c, ' ')
-    description = ' '.join(description.strip().split())
-    return description
-
-
-def update_description(engine_name, lang, description, source, replace=True):
-    if replace or lang not in descriptions[engine_name]:
-        descriptions[engine_name][lang] = [normalize_description(description), source]
-
-
-def get_wikipedia_summary(language, pageid):
-    search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
-    url = search_url.format(title=quote(pageid), language=language)
-    try:
-        response = searx.poolrequests.get(url)
-        response.raise_for_status()
-        api_result = json.loads(response.text)
-        return api_result.get('extract')
-    except:
-        return None
-
-
-def detect_language(text):
-    r = cld3.get_language(str(text))  # pylint: disable=E1101
-    if r is not None and r.probability >= 0.98 and r.is_reliable:
-        return r.language
-    return None
-
-
-def get_website_description(url, lang1, lang2=None):
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
-        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-        'DNT': '1',
-        'Upgrade-Insecure-Requests': '1',
-        'Sec-GPC': '1',
-        'Cache-Control': 'max-age=0',
-    }
-    if lang1 is not None:
-        lang_list = [lang1]
-        if lang2 is not None:
-            lang_list.append(lang2)
-        headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
-    try:
-        response = searx.poolrequests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-    except Exception:
-        return (None, None)
-
-    try:
-        html = fromstring(response.text)
-    except ValueError:
-        html = fromstring(response.content)
-
-    description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content'))
-    if not description:
-        description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content'))
-    if not description:
-        description = extract_text(html.xpath('/html/head/title'))
-    lang = extract_text(html.xpath('/html/@lang'))
-    if lang is None and len(lang1) > 0:
-        lang = lang1
-    lang = detect_language(description) or lang or 'en'
-    lang = lang.split('_')[0]
-    lang = lang.split('-')[0]
-    return (lang, description)
-
-
-def initialize():
-    global descriptions, wd_to_engine_name, IDS
-    searx.search.initialize()
-    for engine_name, engine in searx.engines.engines.items():
-        descriptions[engine_name] = {}
-        wikidata_id = getattr(engine, "about", {}).get('wikidata_id')
-        if wikidata_id is not None:
-            wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name)
-
-    IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys())))
-
-
-def fetch_wikidata_descriptions():
-    global IDS
-    result = send_wikidata_query(SPARQL_DESCRIPTION
-                                 .replace('%IDS%', IDS)
-                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
-    if result is not None:
-        for binding in result['results']['bindings']:
-            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
-            lang = binding['itemDescription']['xml:lang']
-            description = binding['itemDescription']['value']
-            if ' ' in description:  # skip unique word description (like "website")
-                for engine_name in wd_to_engine_name[wikidata_id]:
-                    update_description(engine_name, lang, description, 'wikidata')
-
-
-def fetch_wikipedia_descriptions():
-    global IDS
-    result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE
-                                 .replace('%IDS%', IDS)
-                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
-    if result is not None:
-        for binding in result['results']['bindings']:
-            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
-            lang = binding['name']['xml:lang']
-            pageid = binding['name']['value']
-            description = get_wikipedia_summary(lang, pageid)
-            if description is not None and ' ' in description:
-                for engine_name in wd_to_engine_name[wikidata_id]:
-                    update_description(engine_name, lang, description, 'wikipedia')
-
-
-def normalize_url(url):
-    url = url.replace('{language}', 'en')
-    url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl()
-    url = url.replace('https://api.', 'https://')
-    return url
-
-
-def fetch_website_description(engine_name, website):
-    default_lang, default_description = get_website_description(website, None, None)
-    if default_lang is None or default_description is None:
-        return
-    if default_lang not in descriptions[engine_name]:
-        descriptions[engine_name][default_lang] = [normalize_description(default_description), website]
-    for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'):
-        if request_lang.split('-')[0] not in descriptions[engine_name]:
-            lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0])
-            if desc is not None and desc != default_description:
-                update_description(engine_name, lang, desc, website, replace=False)
-            else:
-                break
-
-
-def fetch_website_descriptions():
-    for engine_name, engine in searx.engines.engines.items():
-        website = getattr(engine, "about", {}).get('website')
-        if website is None:
-            website = normalize_url(getattr(engine, "search_url"))
-        if website is None:
-            website = normalize_url(getattr(engine, "base_url"))
-        if website is not None:
-            fetch_website_description(engine_name, website)
-
-
-def main():
-    initialize()
-    fetch_wikidata_descriptions()
-    fetch_wikipedia_descriptions()
-    fetch_website_descriptions()
-
-    sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/fetch_external_bangs.py b/utils/fetch_external_bangs.py
deleted file mode 100755
index ba6f51e7a..000000000
--- a/utils/fetch_external_bangs.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python
-"""
-Update searx/data/external_bangs.json using the duckduckgo bangs.
-
-https://duckduckgo.com/newbang loads
-* a javascript which provides the bang version ( https://duckduckgo.com/bv1.js )
-* a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example )
-
-This script loads the javascript, then the bangs.
-
-The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ),
-but most probably it will requires to update RE_BANG_VERSION
-"""
-# pylint: disable=C0116
-
-import sys
-import json
-import re
-from os.path import realpath, dirname, join
-
-import requests
-
-# set path
-sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
-
-from searx import searx_dir  # pylint: disable=E0401 C0413
-
-
-# from https://duckduckgo.com/newbang
-URL_BV1 = 'https://duckduckgo.com/bv1.js'
-RE_BANG_VERSION = re.compile(r'\/bang\.v([0-9]+)\.js')
-HTTPS_COLON = 'https:'
-HTTP_COLON = 'http:'
-
-
-def get_bang_url():
-    response = requests.get(URL_BV1)
-    response.raise_for_status()
-
-    r = RE_BANG_VERSION.findall(response.text)
-    return f'https://duckduckgo.com/bang.v{r[0]}.js', r[0]
-
-
-def fetch_ddg_bangs(url):
-    response = requests.get(url)
-    response.raise_for_status()
-    return json.loads(response.content.decode())
-
-
-def merge_when_no_leaf(node):
-    """Minimize the number of nodes
-
-    A -> B -> C
-    B is child of A
-    C is child of B
-
-    If there are no C equals to '*', then each C are merged into A
-
-    For example:
-      d -> d -> g -> * (ddg*)
-        -> i -> g -> * (dig*)
-    becomes
-      d -> dg -> *
-        -> ig -> *
-    """
-    restart = False
-    if not isinstance(node, dict):
-        return
-
-    # create a copy of the keys so node can be modified
-    keys = list(node.keys())
-
-    for key in keys:
-        if key == '*':
-            continue
-
-        value = node[key]
-        value_keys = list(value.keys())
-        if '*' not in value_keys:
-            for value_key in value_keys:
-                node[key + value_key] = value[value_key]
-                merge_when_no_leaf(node[key + value_key])
-            del node[key]
-            restart = True
-        else:
-            merge_when_no_leaf(value)
-
-    if restart:
-        merge_when_no_leaf(node)
-
-
-def optimize_leaf(parent, parent_key, node):
-    if not isinstance(node, dict):
-        return
-
-    if len(node) == 1 and '*' in node and parent is not None:
-        parent[parent_key] = node['*']
-    else:
-        for key, value in node.items():
-            optimize_leaf(node, key, value)
-
-
-def parse_ddg_bangs(ddg_bangs):
-    bang_trie = {}
-    bang_urls = {}
-
-    for bang_definition in ddg_bangs:
-        # bang_list
-        bang_url = bang_definition['u']
-        if '{{{s}}}' not in bang_url:
-            # ignore invalid bang
-            continue
-
-        bang_url = bang_url.replace('{{{s}}}', chr(2))
-
-        # only for the https protocol: "https://example.com" becomes "//example.com"
-        if bang_url.startswith(HTTPS_COLON + '//'):
-            bang_url = bang_url[len(HTTPS_COLON):]
-
-        #
-        if bang_url.startswith(HTTP_COLON + '//') and bang_url[len(HTTP_COLON):] in bang_urls:
-            # if the bang_url uses the http:// protocol, and the same URL exists in https://
-            # then reuse the https:// bang definition. (written //example.com)
-            bang_def_output = bang_urls[bang_url[len(HTTP_COLON):]]
-        else:
-            # normal use case : new http:// URL or https:// URL (without "https:", see above)
-            bang_rank = str(bang_definition['r'])
-            bang_def_output = bang_url + chr(1) + bang_rank
-            bang_def_output = bang_urls.setdefault(bang_url, bang_def_output)
-
-        bang_urls[bang_url] = bang_def_output
-
-        # bang name
-        bang = bang_definition['t']
-
-        # bang_trie
-        t = bang_trie
-        for bang_letter in bang:
-            t = t.setdefault(bang_letter, {})
-        t = t.setdefault('*', bang_def_output)
-
-    # optimize the trie
-    merge_when_no_leaf(bang_trie)
-    optimize_leaf(None, None, bang_trie)
-
-    return bang_trie
-
-
-def get_bangs_filename():
-    return join(join(searx_dir, "data"), "external_bangs.json")
-
-
-if __name__ == '__main__':
-    bangs_url, bangs_version = get_bang_url()
-    print(f'fetch bangs from {bangs_url}')
-    output = {
-        'version': bangs_version,
-        'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url))
-    }
-    with open(get_bangs_filename(), 'w') as fp:
-        json.dump(output, fp, ensure_ascii=False, indent=4)
diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py
deleted file mode 100755
index 997a752b3..000000000
--- a/utils/fetch_firefox_version.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-
-# set path
-from sys import path
-from os.path import realpath, dirname, join
-path.append(realpath(dirname(realpath(__file__)) + '/../'))
-
-#
-import json
-import requests
-import re
-from urllib.parse import urlparse, urljoin
-from distutils.version import LooseVersion, StrictVersion
-from lxml import html
-from searx import searx_dir
-
-URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
-RELEASE_PATH = '/pub/firefox/releases/'
-
-NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
-# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
-# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
-
-# 
-useragents = {
-    "versions": (),
-    "os": ('Windows NT 10.0; WOW64',
-           'X11; Linux x86_64'),
-    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
-}
-
-
-def fetch_firefox_versions():
-    resp = requests.get(URL, timeout=2.0)
-    if resp.status_code != 200:
-        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
-    else:
-        dom = html.fromstring(resp.text)
-        versions = []
-
-        for link in dom.xpath('//a/@href'):
-            url = urlparse(urljoin(URL, link))
-            path = url.path
-            if path.startswith(RELEASE_PATH):
-                version = path[len(RELEASE_PATH):-1]
-                if NORMAL_REGEX.match(version):
-                    versions.append(LooseVersion(version))
-
-        list.sort(versions, reverse=True)
-        return versions
-
-
-def fetch_firefox_last_versions():
-    versions = fetch_firefox_versions()
-
-    result = []
-    major_last = versions[0].version[0]
-    major_list = (major_last, major_last - 1)
-    for version in versions:
-        major_current = version.version[0]
-        if major_current in major_list:
-            result.append(version.vstring)
-
-    return result
-
-
-def get_useragents_filename():
-    return join(join(searx_dir, "data"), "useragents.json")
-
-
-useragents["versions"] = fetch_firefox_last_versions()
-with open(get_useragents_filename(), "w") as f:
-    json.dump(useragents, f, indent=4, ensure_ascii=False)
diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py
deleted file mode 100644
index 582e0ae00..000000000
--- a/utils/fetch_languages.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This script generates languages.py from intersecting each engine's supported languages.
-#
-# Output files: searx/data/engines_languages.json and searx/languages.py
-
-import json
-from pathlib import Path
-from pprint import pformat
-from sys import path
-from babel import Locale, UnknownLocaleError
-from babel.languages import get_global
-
-path.append('../searx')  # noqa
-from searx import settings, searx_dir
-from searx.engines import initialize_engines, engines
-
-# Output files.
-engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
-languages_file = Path(searx_dir) / 'languages.py'
-
-
-# Fetchs supported languages for each engine and writes json file with those.
-def fetch_supported_languages():
-
-    engines_languages = dict()
-    names = list(engines)
-    names.sort()
-
-    for engine_name in names:
-        if hasattr(engines[engine_name], 'fetch_supported_languages'):
-            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
-            print("fetched %s languages from engine %s" % (
-                len(engines_languages[engine_name]), engine_name))
-            if type(engines_languages[engine_name]) == list:
-                engines_languages[engine_name] = sorted(engines_languages[engine_name])
-
-    # write json file
-    with open(engines_languages_file, 'w', encoding='utf-8') as f:
-        json.dump(engines_languages, f, indent=2, sort_keys=True)
-
-    return engines_languages
-
-
-# Get babel Locale object from lang_code if possible.
-def get_locale(lang_code):
-    try:
-        locale = Locale.parse(lang_code, sep='-')
-        return locale
-    except (UnknownLocaleError, ValueError):
-        return None
-
-
-# Join all language lists.
-def join_language_lists(engines_languages):
-    language_list = dict()
-    for engine_name in engines_languages:
-        for lang_code in engines_languages[engine_name]:
-
-            # apply custom fixes if necessary
-            if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
-                lang_code = next(lc for lc, alias in engines[engine_name].language_aliases.items()
-                                 if lang_code == alias)
-
-            locale = get_locale(lang_code)
-
-            # ensure that lang_code uses standard language and country codes
-            if locale and locale.territory:
-                lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
-            short_code = lang_code.split('-')[0]
-
-            # add language without country if not in list
-            if short_code not in language_list:
-                if locale:
-                    # get language's data from babel's Locale object
-                    language_name = locale.get_language_name().title()
-                    english_name = locale.english_name.split(' (')[0]
-                elif short_code in engines_languages['wikipedia']:
-                    # get language's data from wikipedia if not known by babel
-                    language_name = engines_languages['wikipedia'][short_code]['name']
-                    english_name = engines_languages['wikipedia'][short_code]['english_name']
-                else:
-                    language_name = None
-                    english_name = None
-
-                # add language to list
-                language_list[short_code] = {'name': language_name,
-                                             'english_name': english_name,
-                                             'counter': set(),
-                                             'countries': dict()}
-
-            # add language with country if not in list
-            if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
-                country_name = ''
-                if locale:
-                    # get country name from babel's Locale object
-                    country_name = locale.get_territory_name()
-
-                language_list[short_code]['countries'][lang_code] = {'country_name': country_name,
-                                                                     'counter': set()}
-
-            # count engine for both language_country combination and language alone
-            language_list[short_code]['counter'].add(engine_name)
-            if lang_code != short_code:
-                language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
-
-    return language_list
-
-
-# Filter language list so it only includes the most supported languages and countries
-def filter_language_list(all_languages):
-    min_engines_per_lang = 15
-    min_engines_per_country = 10
-    main_engines = [engine_name for engine_name in engines.keys()
-                    if 'general' in engines[engine_name].categories and
-                       engines[engine_name].supported_languages and
-                       not engines[engine_name].disabled]
-
-    # filter list to include only languages supported by most engines or all default general engines
-    filtered_languages = {code: lang for code, lang
-                          in all_languages.items()
-                          if (len(lang['counter']) >= min_engines_per_lang or
-                              all(main_engine in lang['counter']
-                                  for main_engine in main_engines))}
-
-    def _copy_lang_data(lang, country_name=None):
-        new_dict = dict()
-        new_dict['name'] = all_languages[lang]['name']
-        new_dict['english_name'] = all_languages[lang]['english_name']
-        if country_name:
-            new_dict['country_name'] = country_name
-        return new_dict
-
-    def _country_count(i):
-        return len(countries[sorted_countries[i]]['counter'])
-
-    # for each language get country codes supported by most engines or at least one country code
-    filtered_languages_with_countries = dict()
-    for lang, lang_data in filtered_languages.items():
-        countries = lang_data['countries']
-        filtered_countries = dict()
-
-        # get language's country codes with enough supported engines
-        for lang_country, country_data in countries.items():
-            if len(country_data['counter']) >= min_engines_per_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
-
-        # add language without countries too if there's more than one country to choose from
-        if len(filtered_countries) > 1:
-            filtered_countries[lang] = _copy_lang_data(lang)
-        elif len(filtered_countries) == 1:
-            # if there's only one country per language, it's not necessary to show country name
-            lang_country = next(iter(filtered_countries))
-            filtered_countries[lang_country]['country_name'] = None
-
-        # if no country has enough engines try to get most likely country code from babel
-        if not filtered_countries:
-            lang_country = None
-            subtags = get_global('likely_subtags').get(lang)
-            if subtags:
-                country_code = subtags.split('_')[-1]
-                if len(country_code) == 2:
-                    lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
-
-            if lang_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang)
-            else:
-                filtered_countries[lang] = _copy_lang_data(lang)
-
-        filtered_languages_with_countries.update(filtered_countries)
-
-    return filtered_languages_with_countries
-
-
-# Write languages.py.
-def write_languages_file(languages):
-    file_headers = (
-        "# -*- coding: utf-8 -*-",
-        "# list of language codes",
-        "# this file is generated automatically by utils/fetch_languages.py",
-        "language_codes ="
-    )
-
-    language_codes = tuple([
-        (
-            code,
-            languages[code]['name'].split(' (')[0],
-            languages[code].get('country_name') or '',
-            languages[code].get('english_name') or ''
-        ) for code in sorted(languages)
-    ])
-
-    with open(languages_file, 'w') as new_file:
-        file_content = "{file_headers} \\\n{language_codes}".format(
-            file_headers='\n'.join(file_headers),
-            language_codes=pformat(language_codes, indent=4)
-        )
-        new_file.write(file_content)
-        new_file.close()
-
-
-if __name__ == "__main__":
-    initialize_engines(settings['engines'])
-    engines_languages = fetch_supported_languages()
-    all_languages = join_language_lists(engines_languages)
-    filtered_languages = filter_language_list(all_languages)
-    write_languages_file(filtered_languages)
diff --git a/utils/fetch_wikidata_units.py b/utils/fetch_wikidata_units.py
deleted file mode 100644
index 69ae8ab27..000000000
--- a/utils/fetch_wikidata_units.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python
-
-import json
-import collections
-
-# set path
-from sys import path
-from os.path import realpath, dirname, join
-path.append(realpath(dirname(realpath(__file__)) + '/../'))
-
-from searx import searx_dir
-from searx.engines.wikidata import send_wikidata_query
-
-
-# the response contains duplicate ?item with the different ?symbol
-# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
-# even if a ?item has different ?symbol of the same rank.
-# A deterministic result 
-# see:
-# * https://www.wikidata.org/wiki/Help:Ranking
-# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
-# * https://w.wiki/32BT
-#   see the result for https://www.wikidata.org/wiki/Q11582
-#   there are multiple symbols the same rank
-SARQL_REQUEST = """
-SELECT DISTINCT ?item ?symbol
-WHERE
-{
-  ?item wdt:P31/wdt:P279 wd:Q47574 .
-  ?item p:P5061 ?symbolP .
-  ?symbolP ps:P5061 ?symbol ;
-           wikibase:rank ?rank .
-  FILTER(LANG(?symbol) = "en").
-}
-ORDER BY ?item DESC(?rank) ?symbol
-"""
-
-
-def get_data():
-    results = collections.OrderedDict()
-    response = send_wikidata_query(SARQL_REQUEST)
-    for unit in response['results']['bindings']:
-        name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
-        unit = unit['symbol']['value']
-        if name not in results:
-            # ignore duplicate: always use the first one
-            results[name] = unit
-    return results
-
-
-def get_wikidata_units_filename():
-    return join(join(searx_dir, "data"), "wikidata_units.json")
-
-
-with open(get_wikidata_units_filename(), 'w') as f:
-    json.dump(get_data(), f, indent=4, ensure_ascii=False)
diff --git a/utils/google_search.py b/utils/google_search.py
deleted file mode 100644
index cad32eeca..000000000
--- a/utils/google_search.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from sys import argv, exit
-
-if not len(argv) > 1:
-    print('search query required')
-    exit(1)
-
-import requests
-from json import dumps
-from searx.engines import google
-from searx.search import default_request_params
-
-request_params = default_request_params()
-# Possible params
-# request_params['headers']['User-Agent'] = ''
-# request_params['category'] = ''
-request_params['pageno'] = 1
-request_params['language'] = 'en_us'
-request_params['time_range'] = ''
-
-params = google.request(argv[1], request_params)
-
-request_args = dict(
-    headers=request_params['headers'],
-    cookies=request_params['cookies'],
-)
-
-if request_params['method'] == 'GET':
-    req = requests.get
-else:
-    req = requests.post
-    request_args['data'] = request_params['data']
-
-resp = req(request_params['url'], **request_args)
-resp.search_params = request_params
-print(dumps(google.response(resp)))
diff --git a/utils/standalone_searx.py b/utils/standalone_searx.py
deleted file mode 100755
index 89023f41b..000000000
--- a/utils/standalone_searx.py
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/usr/bin/env python
-"""Script to run searx from terminal.
-
-Getting categories without initiate the engine will only return `['general']`
-
->>> import searx.engines
-... list(searx.engines.categories.keys())
-['general']
->>> import searx.search
-... searx.search.initialize()
-... list(searx.engines.categories.keys())
-['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
-
-Example to use this script:
-
-.. code::  bash
-
-    $ python3 utils/standalone_searx.py rain
-
-Example to run it from python:
-
->>> import importlib
-... import json
-... import sys
-... import searx.engines
-... import searx.search
-... search_query = 'rain'
-... # initialize engines
-... searx.search.initialize()
-... # load engines categories once instead of each time the function called
-... engine_cs = list(searx.engines.categories.keys())
-... # load module
-... spec = importlib.util.spec_from_file_location(
-...     'utils.standalone_searx', 'utils/standalone_searx.py')
-... sas = importlib.util.module_from_spec(spec)
-... spec.loader.exec_module(sas)
-... # use function from module
-... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
-... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
-... res_dict = sas.to_dict(search_q)
-... sys.stdout.write(json.dumps(
-...     res_dict, sort_keys=True, indent=4, ensure_ascii=False,
-...     default=sas.json_serial))
-{
-    "answers": [],
-    "infoboxes": [ {...} ],
-    "paging": true,
-    "results": [... ],
-    "results_number": 820000000.0,
-    "search": {
-        "lang": "all",
-        "pageno": 1,
-        "q": "rain",
-        "safesearch": 0,
-        "timerange": null
-    },
-    "suggestions": [...]
-}
-"""  # noqa: E501
-# pylint: disable=pointless-string-statement
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2016- by Alexandre Flament, <alex@al-f.net>
-'''
-# pylint: disable=wrong-import-position
-import argparse
-import sys
-from datetime import datetime
-from json import dumps
-from typing import Any, Dict, List, Optional
-
-import searx
-import searx.preferences
-import searx.query
-import searx.search
-import searx.webadapter
-
-EngineCategoriesVar = Optional[List[str]]
-
-
-def get_search_query(
-        args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
-) -> searx.search.SearchQuery:
-    """Get  search results for the query"""
-    if engine_categories is None:
-        engine_categories = list(searx.engines.categories.keys())
-    try:
-        category = args.category.decode('utf-8')
-    except AttributeError:
-        category = args.category
-    form = {
-        "q": args.query,
-        "categories": category,
-        "pageno": str(args.pageno),
-        "language": args.lang,
-        "time_range": args.timerange
-    }
-    preferences = searx.preferences.Preferences(
-        ['oscar'], engine_categories, searx.engines.engines, [])
-    preferences.key_value_settings['safesearch'].parse(args.safesearch)
-
-    search_query = searx.webadapter.get_search_query_from_webapp(
-        preferences, form)[0]
-    return search_query
-
-
-def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Remove parsed url from dict."""
-    for result in results:
-        del result['parsed_url']
-    return results
-
-
-def json_serial(obj: Any) -> Any:
-    """JSON serializer for objects not serializable by default json code.
-
-    :raise TypeError: raised when **obj** is not serializable
-    """
-    if isinstance(obj, datetime):
-        serial = obj.isoformat()
-        return serial
-    if isinstance(obj, bytes):
-        return obj.decode('utf8')
-    if isinstance(obj, set):
-        return list(obj)
-    raise TypeError("Type ({}) not serializable".format(type(obj)))
-
-
-def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
-    """Get result from parsed arguments."""
-    result_container = searx.search.Search(search_query).search()
-    result_container_json = {
-        "search": {
-            "q": search_query.query,
-            "pageno": search_query.pageno,
-            "lang": search_query.lang,
-            "safesearch": search_query.safesearch,
-            "timerange": search_query.time_range,
-        },
-        "results": no_parsed_url(result_container.get_ordered_results()),
-        "infoboxes": result_container.infoboxes,
-        "suggestions": list(result_container.suggestions),
-        "answers": list(result_container.answers),
-        "paging": result_container.paging,
-        "results_number": result_container.results_number()
-    }
-    return result_container_json
-
-
-def parse_argument(
-        args: Optional[List[str]]=None,
-        category_choices: EngineCategoriesVar=None
-) -> argparse.Namespace:
-    """Parse command line.
-
-    :raise SystemExit: Query argument required on `args`
-
-    Examples:
-
-    >>> import importlib
-    ... # load module
-    ... spec = importlib.util.spec_from_file_location(
-    ...     'utils.standalone_searx', 'utils/standalone_searx.py')
-    ... sas = importlib.util.module_from_spec(spec)
-    ... spec.loader.exec_module(sas)
-    ... sas.parse_argument()
-    usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
-                     query
-    SystemExit: 2
-    >>> sas.parse_argument(['rain'])
-    Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
-    """  # noqa: E501
-    if not category_choices:
-        category_choices = list(searx.engines.categories.keys())
-    parser = argparse.ArgumentParser(description='Standalone searx.')
-    parser.add_argument('query', type=str,
-                        help='Text query')
-    parser.add_argument('--category', type=str, nargs='?',
-                        choices=category_choices,
-                        default='general',
-                        help='Search category')
-    parser.add_argument('--lang', type=str, nargs='?', default='all',
-                        help='Search language')
-    parser.add_argument('--pageno', type=int, nargs='?', default=1,
-                        help='Page number starting from 1')
-    parser.add_argument(
-        '--safesearch', type=str, nargs='?',
-        choices=['0', '1', '2'], default='0',
-        help='Safe content filter from none to strict')
-    parser.add_argument(
-        '--timerange', type=str,
-        nargs='?', choices=['day', 'week', 'month', 'year'],
-        help='Filter by time range')
-    return parser.parse_args(args)
-
-
-if __name__ == '__main__':
-    searx.search.initialize()
-    engine_cs = list(searx.engines.categories.keys())
-    prog_args = parse_argument(category_choices=engine_cs)
-    search_q = get_search_query(prog_args, engine_categories=engine_cs)
-    res_dict = to_dict(search_q)
-    sys.stdout.write(dumps(
-        res_dict, sort_keys=True, indent=4, ensure_ascii=False,
-        default=json_serial))
diff --git a/utils/update-translations.sh b/utils/update-translations.sh
deleted file mode 100755
index 240387ae7..000000000
--- a/utils/update-translations.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-
-# script to easily update translation language files
-
-# add new language:
-# pybabel init -i messages.pot -d searx/translations -l en
-
-SEARX_DIR='searx'
-
-pybabel extract -F babel.cfg -o messages.pot "$SEARX_DIR"
-for f in `ls "$SEARX_DIR"'/translations/'`; do
-    pybabel update -N -i messages.pot -d "$SEARX_DIR"'/translations/' -l "$f"
-done
-
-echo '[!] update done, edit .po files if required and run pybabel compile -d searx/translations/'
-- 
cgit v1.2.3