summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Dockerfile2
-rw-r--r--docs/src/searx.engines.google.rst55
-rwxr-xr-xmanage2
-rw-r--r--searx/engines/google.py76
-rw-r--r--searx/engines/google_images.py13
-rw-r--r--searx/engines/google_news.py9
-rw-r--r--searx/engines/google_videos.py11
-rw-r--r--searx/settings.yml4
8 files changed, 122 insertions, 50 deletions
diff --git a/Dockerfile b/Dockerfile
index 7ad892bb2..13885611b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM alpine:3.13
+FROM alpine:3.14
ENTRYPOINT ["/sbin/tini","--","/usr/local/searx/dockerfiles/docker-entrypoint.sh"]
EXPOSE 8080
VOLUME /etc/searx
diff --git a/docs/src/searx.engines.google.rst b/docs/src/searx.engines.google.rst
new file mode 100644
index 000000000..2d10b5eea
--- /dev/null
+++ b/docs/src/searx.engines.google.rst
@@ -0,0 +1,55 @@
+.. _google engines:
+
+==============
+Google Engines
+==============
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+
+.. _google API:
+
+google API
+==========
+
+.. _Query Parameter Definitions:
+ https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+
+For detailed description of the *REST-full* API see: `Query Parameter
+Definitions`_. Not all parameters can be appied and some engines are *special*
+(e.g. :ref:`google news engine`).
+
+.. _google web engine:
+
+Google WEB
+==========
+
+.. automodule:: searx.engines.google
+ :members:
+
+.. _google images engine:
+
+Google Images
+=============
+
+.. automodule:: searx.engines.google_images
+ :members:
+
+.. _google videos engine:
+
+Google Videos
+=============
+
+.. automodule:: searx.engines.google_videos
+ :members:
+
+.. _google news engine:
+
+Google News
+===========
+
+.. automodule:: searx.engines.google_news
+ :members:
diff --git a/manage b/manage
index c741dfb81..f9e6861b0 100755
--- a/manage
+++ b/manage
@@ -337,7 +337,7 @@ pygments.less() {
}
py.build() {
- build_msg BUILD "[pylint] python package ${PYDIST}"
+ build_msg BUILD "python package ${PYDIST}"
pyenv.cmd python setup.py \
sdist -d "${PYDIST}" \
bdist_wheel --bdist-dir "${PYBUILD}" -d "${PYDIST}"
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 841212e09..2ad9b4ca8 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -1,12 +1,28 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Google (Web)
+"""This is the implementation of the google WEB engine. Some of this
+implementations are shared by other engines:
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
+- :ref:`google images engine`
+- :ref:`google news engine`
+- :ref:`google videos engine`
+
+The google WEB engine itself has a special setup option:
+
+.. code:: yaml
+
+ - name: google
+ ...
+ use_mobile_ui: true
+
+``use_mobile_ui``: (default: ``true``)
+ Enables to use *mobile endpoint* to bypass the google blocking (see
+ :issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More
+ results` is not affected by Google rate limiting and we can still do requests
+ while actively blocked by the original Google search. By activate
+ ``use_mobile_ui`` this behavior is simulated by adding the parameter
+ ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
-.. _Query Parameter Definitions:
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
"""
# pylint: disable=invalid-name, missing-function-docstring
@@ -34,6 +50,7 @@ categories = ['general']
paging = True
time_range_support = True
safesearch = True
+use_mobile_ui = False
supported_languages_url = 'https://www.google.com/preferences?#languages'
# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
@@ -136,8 +153,9 @@ spelling_suggestion_xpath = '//div[@class="med"]/p/a'
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
"""Composing various language properties for the google engines.
- This function is called by the various google engines (google itself,
- google-images, -news, -scholar, -videos).
+ This function is called by the various google engines (:ref:`google web
+ engine`, :ref:`google images engine`, :ref:`google news engine` and
+ :ref:`google videos engine`).
:param dict param: request parameters of the engine
@@ -145,7 +163,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
:py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
:param dict lang_list: custom aliases for non standard language codes
- (used when calling :py:func:`searx.utils.match_language)
+ (used when calling :py:func:`searx.utils.match_language`)
:param bool supported_any_language: When a language is not specified, the
language interpretation is left up to Google to decide how the search
@@ -158,7 +176,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
Py-Dictionary with the key/value pairs:
language:
- Return value from :py:func:`searx.utils.match_language
+ Return value from :py:func:`searx.utils.match_language`
country:
The country code (e.g. US, AT, CA, FR, DE ..)
@@ -266,6 +284,12 @@ def request(query, params):
params, supported_languages, language_aliases, True
)
+ additional_parameters = {}
+ if use_mobile_ui:
+ additional_parameters = {
+ 'async': 'use_ac:true,_fmt:pc',
+ }
+
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
'q': query,
@@ -273,6 +297,7 @@ def request(query, params):
'ie': "utf8",
'oe': "utf8",
'start': offset,
+ **additional_parameters,
})
if params['time_range'] in time_range_dict:
@@ -282,9 +307,12 @@ def request(query, params):
params['url'] = query_url
params['headers'].update(lang_info['headers'])
- params['headers']['Accept'] = (
- 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
- )
+ if use_mobile_ui:
+ params['headers']['Accept'] = '*/*'
+ else:
+ params['headers']['Accept'] = (
+ 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+ )
return params
@@ -300,21 +328,23 @@ def response(resp):
dom = html.fromstring(resp.text)
# results --> answer
- answer = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]//text()')
- if answer:
- results.append({'answer': ' '.join(answer)})
+ answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
+ if answer_list:
+ answer_list = [_.xpath("normalize-space()") for _ in answer_list]
+ results.append({'answer': ' '.join(answer_list)})
else:
logger.debug("did not find 'answer'")
# results --> number_of_results
- try:
- _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
- _digit = ''.join([n for n in _txt if n.isdigit()])
- number_of_results = int(_digit)
- results.append({'number_of_results': number_of_results})
- except Exception as e: # pylint: disable=broad-except
- logger.debug("did not 'number_of_results'")
- logger.error(e, exc_info=True)
+ if not use_mobile_ui:
+ try:
+ _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
+ _digit = ''.join([n for n in _txt if n.isdigit()])
+ number_of_results = int(_digit)
+ results.append({'number_of_results': number_of_results})
+ except Exception as e: # pylint: disable=broad-except
+ logger.debug("did not 'number_of_results'")
+ logger.error(e, exc_info=True)
# parse results
for result in eval_xpath_list(dom, results_xpath):
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index e7382a6fe..c17227a64 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -1,19 +1,14 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Google (Images)
+"""This is the implementation of the google images engine.
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
-
-.. _admonition:: Content-Security-Policy (CSP)
+.. admonition:: Content-Security-Policy (CSP)
This engine needs to allow images from the `data URLs`_ (prefixed with the
- ``data:` scheme).::
+ ``data:`` scheme)::
- Header set Content-Security-Policy "img-src 'self' data: ;"
+ Header set Content-Security-Policy "img-src 'self' data: ;"
-.. _Query Parameter Definitions:
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
.. _data URLs:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
"""
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index c1c97b700..e6d50855e 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -1,16 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Google (News)
-
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_. Not all parameters can be appied:
+"""This is the implementation of the google news engine. The google news API
+ignores some parameters from the common :ref:`google API`:
- num_ : the number of search results is ignored
- save_ : is ignored / Google-News results are always *SafeSearch*
-.. _Query Parameter Definitions:
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
-
.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index c57db4e63..3747e85a5 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -1,19 +1,14 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Google (Video)
+"""This is the implementation of the google videos engine.
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_. Not all parameters can be appied.
-
-.. _admonition:: Content-Security-Policy (CSP)
+.. admonition:: Content-Security-Policy (CSP)
This engine needs to allow images from the `data URLs`_ (prefixed with the
- ``data:` scheme).::
+ ``data:`` scheme)::
Header set Content-Security-Policy "img-src 'self' data: ;"
-.. _Query Parameter Definitions:
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
.. _data URLs:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
diff --git a/searx/settings.yml b/searx/settings.yml
index 1b19cc29e..3925081d6 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -583,6 +583,8 @@ engines:
- name: google
engine: google
shortcut: go
+ # see https://searxng.github.io/searxng/src/searx.engines.google.html#module-searx.engines.google
+ use_mobile_ui: true
# additional_tests:
# android: *test_android
@@ -1454,7 +1456,7 @@ engines:
timeout: 5.0
disabled: true
- - name: słownik języka polskiego
+ - name: slownik jezyka polskiego
engine: sjp
shortcut: sjp
base_url: https://sjp.pwn.pl/