diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/google.py | 11 | ||||
| -rw-r--r-- | searx/engines/invidious.py | 30 | ||||
| -rw-r--r-- | searx/engines/openstreetmap.py | 4 | ||||
| -rw-r--r-- | searx/engines/wikidata.py | 71 |
4 files changed, 76 insertions, 40 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py index 13d270113..e5f24b166 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -111,9 +111,8 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} # specific xpath variables # ------------------------ -# google results are grouped into <div class="g ..." ../> -results_xpath = '//div[@id="search"]//div[contains(@class, "g ")]' -results_xpath_mobile_ui = '//div[contains(@class, "g ")]' +# google results are grouped into <div class="jtfYYd ..." ../> +results_xpath = '//div[@class="jtfYYd"]' # google *sections* are no usual *results*, we ignore them g_section_with_header = './g-section-with-header' @@ -338,11 +337,7 @@ def response(resp): # parse results - _results_xpath = results_xpath - if use_mobile_ui: - _results_xpath = results_xpath_mobile_ui - - for result in eval_xpath_list(dom, _results_xpath): + for result in eval_xpath_list(dom, results_xpath): # google *sections* if extract_text(eval_xpath(result, g_section_with_header)): diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py index 914615d6f..badef57fd 100644 --- a/searx/engines/invidious.py +++ b/searx/engines/invidious.py @@ -1,12 +1,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Invidious (Videos) +# lint: pylint +"""Invidious (Videos) """ -from urllib.parse import quote_plus -from dateutil import parser import time import random +from urllib.parse import quote_plus +from dateutil import parser # about about = { @@ -23,16 +23,11 @@ categories = ["videos", "music"] paging = True time_range_support = True - -# search-url - -base_url = '' -base_url_rand = '' +# base_url can be overwritten by a list of URLs in the settings.yml +base_url = 'https://vid.puffyan.us' -# do search-request def request(query, params): - global base_url_rand time_range_dict = { "day": "today", "week": "week", @@ -41,11 +36,11 @@ def request(query, params): } if isinstance(base_url, list): - base_url_rand = random.choice(base_url) + params["base_url"] = random.choice(base_url) else: - base_url_rand = base_url + params["base_url"] = base_url - search_url = base_url_rand + "api/v1/search?q={query}" + search_url = params["base_url"] + "/api/v1/search?q={query}" params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"]) if params["time_range"] in time_range_dict: @@ -59,7 +54,6 @@ def request(query, params): return params -# get response from search-request def response(resp): results = [] @@ -67,12 +61,12 @@ def response(resp): embedded_url = ( '<iframe width="540" height="304" ' + 'data-src="' - + base_url_rand - + 'embed/{videoid}" ' + + resp.search_params['base_url'] + + '/embed/{videoid}" ' + 'frameborder="0" allowfullscreen></iframe>' ) - base_invidious_url = base_url_rand + "watch?v=" + base_invidious_url = resp.search_params['base_url'] + "/watch?v=" for result in search_results: rtype = result.get("type", None) diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 946869834..c619ce98e 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -14,7 +14,7 @@ from flask_babel import gettext from searx.data import OSM_KEYS_TAGS, CURRENCIES from searx.utils import searx_useragent from searx.external_urls import get_external_url -from searx.engines.wikidata import send_wikidata_query, sparql_string_escape +from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail # about about = { @@ -168,7 +168,7 @@ def response(resp): continue url, osm, geojson = get_url_osm_geojson(result) - img_src = get_img_src(result) + img_src = get_thumbnail(get_img_src(result)) links, link_keys = get_links(result, user_language) data = get_data(result, user_language, link_keys) diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index e5d3f55c0..592a51ec8 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -1,10 +1,11 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Wikidata """ - Wikidata -""" - +# pylint: disable=missing-class-docstring -from urllib.parse import urlencode +from hashlib import md5 +from urllib.parse import urlencode, unquote from json import loads from dateutil.parser import isoparse @@ -185,7 +186,51 @@ def response(resp): return results +_IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/" +_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/thumb/" + + +def get_thumbnail(img_src): + """Get Thumbnail image from wikimedia commons + + Images from commons.wikimedia.org are (HTTP) redirected to + upload.wikimedia.org. The redirected URL can be calculated by this + function. + + - https://stackoverflow.com/a/33691240 + + """ + logger.debug('get_thumbnail(): %s', img_src) + if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]: + img_src_name = unquote(img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[0].replace("%20", "_")) + img_src_name_first = img_src_name + img_src_name_second = img_src_name + + if ".svg" in img_src_name.split()[0]: + img_src_name_second = img_src_name + ".png" + + img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1] + img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")] + img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest() + img_src = ( + _IMG_SRC_NEW_URL_PREFIX + + img_src_name_md5[0] + + "/" + + img_src_name_md5[0:2] + + "/" + + img_src_name_first + + "/" + + img_src_size + + "px-" + + img_src_name_second + ) + logger.debug('get_thumbnail() redirected: %s', img_src) + + return img_src + + def get_results(attribute_result, attributes, language): + # pylint: disable=too-many-branches results = [] infobox_title = attribute_result.get('itemLabel') infobox_id = attribute_result['item'] @@ -194,7 +239,7 @@ def get_results(attribute_result, attributes, language): infobox_attributes = [] infobox_content = attribute_result.get('itemDescription', []) img_src = None - img_src_priority = 100 + img_src_priority = 0 for attribute in attributes: value = attribute.get_str(attribute_result, language) @@ -220,8 +265,8 @@ def get_results(attribute_result, attributes, language): # this attribute is an image. # replace the current image only the priority is lower # (the infobox contain only one image). - if attribute.priority < img_src_priority: - img_src = value + if attribute.priority > img_src_priority: + img_src = get_thumbnail(value) img_src_priority = attribute.priority elif attribute_type == WDGeoAttribute: # geocoordinate link @@ -278,6 +323,7 @@ def get_query(query, language): def get_attributes(language): + # pylint: disable=too-many-statements attributes = [] def add_value(name): @@ -418,7 +464,7 @@ def get_attributes(language): class WDAttribute: - + # pylint: disable=no-self-use __slots__ = ('name',) def __init__(self, name): @@ -439,7 +485,7 @@ class WDAttribute: def get_group_by(self): return "" - def get_str(self, result, language): + def get_str(self, result, language): # pylint: disable=unused-argument return result.get(self.name + 's') def __repr__(self): @@ -580,6 +626,7 @@ class WDImageAttribute(WDURLAttribute): class WDDateAttribute(WDAttribute): + # pylint: disable=no-self-use def get_select(self): return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name) @@ -600,7 +647,7 @@ class WDDateAttribute(WDAttribute): def get_group_by(self): return self.get_select() - def format_8(self, value, locale): + def format_8(self, value, locale): # pylint: disable=unused-argument # precision: less than a year return value @@ -673,7 +720,7 @@ class WDDateAttribute(WDAttribute): else: value = t[0] return format_method(value, language) - except Exception: + except Exception: # pylint: disable=broad-except return value return value @@ -687,7 +734,7 @@ def debug_explain_wikidata_query(query, method='GET'): return http_response.content -def init(engine_settings=None): +def init(engine_settings=None): # pylint: disable=unused-argument # WIKIDATA_PROPERTIES : add unit symbols WIKIDATA_PROPERTIES.update(WIKIDATA_UNITS) |