diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/digg.py | 69 | ||||
| -rw-r--r-- | searx/engines/google_images.py | 5 | ||||
| -rw-r--r-- | searx/engines/qwant.py | 2 |
3 files changed, 5 insertions, 71 deletions
diff --git a/searx/engines/digg.py b/searx/engines/digg.py deleted file mode 100644 index e12cc43c8..000000000 --- a/searx/engines/digg.py +++ /dev/null @@ -1,69 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -# lint: pylint -""" - Digg (News, Social media) -""" - -from json import loads -from urllib.parse import urlencode -from datetime import datetime - -from lxml import html - -# about -about = { - "website": 'https://digg.com', - "wikidata_id": 'Q270478', - "official_api_documentation": None, - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['news', 'social media'] -paging = True -base_url = 'https://digg.com' - -# search-url -search_url = base_url + ( - '/api/search/' - '?{query}' - '&from={position}' - '&size=20' - '&format=html' -) - -def request(query, params): - offset = (params['pageno'] - 1) * 20 - params['url'] = search_url.format( - query = urlencode({'q': query}), - position = offset, - ) - return params - -def response(resp): - results = [] - - # parse results - for result in loads(resp.text)['mapped']: - - # strip html tags and superfluous quotation marks from content - content = html.document_fromstring( - result['excerpt'] - ).text_content() - - # 'created': {'ISO': '2020-10-16T14:09:55Z', ...} - published = datetime.strptime( - result['created']['ISO'], '%Y-%m-%dT%H:%M:%SZ' - ) - results.append({ - 'url': result['url'], - 'title': result['title'], - 'content' : content, - 'template': 'videos.html', - 'publishedDate': published, - 'thumbnail': result['images']['thumbImage'], - }) - - return results diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index ffdd6675e..61d291e3f 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -172,7 +172,10 @@ def response(resp): thumbnail_src = '' link_node = eval_xpath_getindex(img_node, '../../../a[2]', 0) - url = eval_xpath_getindex(link_node, '@href', 0) + url = eval_xpath_getindex(link_node, '@href', 0, None) + if url is None: + logger.error("missing @href in node: %s", html.tostring(link_node)) + continue pub_nodes = eval_xpath(link_node, './div/div') pub_descr = img_alt diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 249a1f4e4..0312e518c 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -59,7 +59,7 @@ category_to_keyword = { } # search-url -url = 'https://api.qwant.com/v3/search/{keyword}?q={query}&count={count}&offset={offset}' +url = 'https://api.qwant.com/v3/search/{keyword}?{query}&count={count}&offset={offset}' def request(query, params): """Qwant search request""" |