summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/google.py11
-rw-r--r--searx/engines/invidious.py30
-rw-r--r--searx/engines/openstreetmap.py4
-rw-r--r--searx/engines/wikidata.py71
4 files changed, 76 insertions, 40 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 13d270113..e5f24b166 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -111,9 +111,8 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
# specific xpath variables
# ------------------------
-# google results are grouped into <div class="g ..." ../>
-results_xpath = '//div[@id="search"]//div[contains(@class, "g ")]'
-results_xpath_mobile_ui = '//div[contains(@class, "g ")]'
+# google results are grouped into <div class="jtfYYd ..." ../>
+results_xpath = '//div[@class="jtfYYd"]'
# google *sections* are no usual *results*, we ignore them
g_section_with_header = './g-section-with-header'
@@ -338,11 +337,7 @@ def response(resp):
# parse results
- _results_xpath = results_xpath
- if use_mobile_ui:
- _results_xpath = results_xpath_mobile_ui
-
- for result in eval_xpath_list(dom, _results_xpath):
+ for result in eval_xpath_list(dom, results_xpath):
# google *sections*
if extract_text(eval_xpath(result, g_section_with_header)):
diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py
index 914615d6f..badef57fd 100644
--- a/searx/engines/invidious.py
+++ b/searx/engines/invidious.py
@@ -1,12 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Invidious (Videos)
+# lint: pylint
+"""Invidious (Videos)
"""
-from urllib.parse import quote_plus
-from dateutil import parser
import time
import random
+from urllib.parse import quote_plus
+from dateutil import parser
# about
about = {
@@ -23,16 +23,11 @@ categories = ["videos", "music"]
paging = True
time_range_support = True
-
-# search-url
-
-base_url = ''
-base_url_rand = ''
+# base_url can be overwritten by a list of URLs in the settings.yml
+base_url = 'https://vid.puffyan.us'
-# do search-request
def request(query, params):
- global base_url_rand
time_range_dict = {
"day": "today",
"week": "week",
@@ -41,11 +36,11 @@ def request(query, params):
}
if isinstance(base_url, list):
- base_url_rand = random.choice(base_url)
+ params["base_url"] = random.choice(base_url)
else:
- base_url_rand = base_url
+ params["base_url"] = base_url
- search_url = base_url_rand + "api/v1/search?q={query}"
+ search_url = params["base_url"] + "/api/v1/search?q={query}"
params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"])
if params["time_range"] in time_range_dict:
@@ -59,7 +54,6 @@ def request(query, params):
return params
-# get response from search-request
def response(resp):
results = []
@@ -67,12 +61,12 @@ def response(resp):
embedded_url = (
'<iframe width="540" height="304" '
+ 'data-src="'
- + base_url_rand
- + 'embed/{videoid}" '
+ + resp.search_params['base_url']
+ + '/embed/{videoid}" '
+ 'frameborder="0" allowfullscreen></iframe>'
)
- base_invidious_url = base_url_rand + "watch?v="
+ base_invidious_url = resp.search_params['base_url'] + "/watch?v="
for result in search_results:
rtype = result.get("type", None)
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 946869834..c619ce98e 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -14,7 +14,7 @@ from flask_babel import gettext
from searx.data import OSM_KEYS_TAGS, CURRENCIES
from searx.utils import searx_useragent
from searx.external_urls import get_external_url
-from searx.engines.wikidata import send_wikidata_query, sparql_string_escape
+from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail
# about
about = {
@@ -168,7 +168,7 @@ def response(resp):
continue
url, osm, geojson = get_url_osm_geojson(result)
- img_src = get_img_src(result)
+ img_src = get_thumbnail(get_img_src(result))
links, link_keys = get_links(result, user_language)
data = get_data(result, user_language, link_keys)
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index e5d3f55c0..592a51ec8 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Wikidata
"""
- Wikidata
-"""
-
+# pylint: disable=missing-class-docstring
-from urllib.parse import urlencode
+from hashlib import md5
+from urllib.parse import urlencode, unquote
from json import loads
from dateutil.parser import isoparse
@@ -185,7 +186,51 @@ def response(resp):
return results
+_IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/"
+_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/thumb/"
+
+
+def get_thumbnail(img_src):
+ """Get Thumbnail image from wikimedia commons
+
+ Images from commons.wikimedia.org are (HTTP) redirected to
+ upload.wikimedia.org. The redirected URL can be calculated by this
+ function.
+
+ - https://stackoverflow.com/a/33691240
+
+ """
+ logger.debug('get_thumbnail(): %s', img_src)
+ if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]:
+ img_src_name = unquote(img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[0].replace("%20", "_"))
+ img_src_name_first = img_src_name
+ img_src_name_second = img_src_name
+
+ if ".svg" in img_src_name.split()[0]:
+ img_src_name_second = img_src_name + ".png"
+
+ img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1]
+ img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")]
+ img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
+ img_src = (
+ _IMG_SRC_NEW_URL_PREFIX
+ + img_src_name_md5[0]
+ + "/"
+ + img_src_name_md5[0:2]
+ + "/"
+ + img_src_name_first
+ + "/"
+ + img_src_size
+ + "px-"
+ + img_src_name_second
+ )
+ logger.debug('get_thumbnail() redirected: %s', img_src)
+
+ return img_src
+
+
def get_results(attribute_result, attributes, language):
+ # pylint: disable=too-many-branches
results = []
infobox_title = attribute_result.get('itemLabel')
infobox_id = attribute_result['item']
@@ -194,7 +239,7 @@ def get_results(attribute_result, attributes, language):
infobox_attributes = []
infobox_content = attribute_result.get('itemDescription', [])
img_src = None
- img_src_priority = 100
+ img_src_priority = 0
for attribute in attributes:
value = attribute.get_str(attribute_result, language)
@@ -220,8 +265,8 @@ def get_results(attribute_result, attributes, language):
# this attribute is an image.
# replace the current image only the priority is lower
# (the infobox contain only one image).
- if attribute.priority < img_src_priority:
- img_src = value
+ if attribute.priority > img_src_priority:
+ img_src = get_thumbnail(value)
img_src_priority = attribute.priority
elif attribute_type == WDGeoAttribute:
# geocoordinate link
@@ -278,6 +323,7 @@ def get_query(query, language):
def get_attributes(language):
+ # pylint: disable=too-many-statements
attributes = []
def add_value(name):
@@ -418,7 +464,7 @@ def get_attributes(language):
class WDAttribute:
-
+ # pylint: disable=no-self-use
__slots__ = ('name',)
def __init__(self, name):
@@ -439,7 +485,7 @@ class WDAttribute:
def get_group_by(self):
return ""
- def get_str(self, result, language):
+ def get_str(self, result, language): # pylint: disable=unused-argument
return result.get(self.name + 's')
def __repr__(self):
@@ -580,6 +626,7 @@ class WDImageAttribute(WDURLAttribute):
class WDDateAttribute(WDAttribute):
+ # pylint: disable=no-self-use
def get_select(self):
return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
@@ -600,7 +647,7 @@ class WDDateAttribute(WDAttribute):
def get_group_by(self):
return self.get_select()
- def format_8(self, value, locale):
+ def format_8(self, value, locale): # pylint: disable=unused-argument
# precision: less than a year
return value
@@ -673,7 +720,7 @@ class WDDateAttribute(WDAttribute):
else:
value = t[0]
return format_method(value, language)
- except Exception:
+ except Exception: # pylint: disable=broad-except
return value
return value
@@ -687,7 +734,7 @@ def debug_explain_wikidata_query(query, method='GET'):
return http_response.content
-def init(engine_settings=None):
+def init(engine_settings=None): # pylint: disable=unused-argument
# WIKIDATA_PROPERTIES : add unit symbols
WIKIDATA_PROPERTIES.update(WIKIDATA_UNITS)