diff options
| -rw-r--r-- | Makefile | 3 | ||||
| -rw-r--r-- | searx/engines/apkmirror.py | 2 | ||||
| -rw-r--r-- | searx/engines/loc.py | 68 | ||||
| -rw-r--r-- | searx/engines/mediathekviewweb.py | 68 | ||||
| -rw-r--r-- | searx/search/processors/online.py | 2 | ||||
| -rw-r--r-- | searx/settings.yml | 28 |
6 files changed, 149 insertions, 22 deletions
@@ -193,7 +193,8 @@ PYLINT_FILES=\ searx/engines/google.py \ searx/engines/google_news.py \ searx/engines/google_videos.py \ - searx/engines/google_images.py + searx/engines/google_images.py \ + searx/engines/mediathekviewweb.py test.pylint: pyenvinstall $(call cmd,pylint,$(PYLINT_FILES)) diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index a4c66e891..a9ddd711a 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -45,7 +45,7 @@ def response(resp): dom = html.fromstring(resp.text) # parse results - for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]/div[@class="appRow"]'): + for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'): link = eval_xpath_getindex(result, './/h5/a', 0) url = base_url + link.attrib.get('href') + '#downloads' diff --git a/searx/engines/loc.py b/searx/engines/loc.py new file mode 100644 index 000000000..5c09ceff2 --- /dev/null +++ b/searx/engines/loc.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + + Library of Congress : images from Prints and Photographs Online Catalog + +""" + +from json import loads +from urllib.parse import urlencode + + +about = { + "website": 'https://www.loc.gov/pictures/', + "wikidata_id": 'Q131454', + "official_api_documentation": 'https://www.loc.gov/pictures/api', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + +categories = ['images'] + +paging = True + +base_url = 'https://loc.gov/pictures/search/?' +search_string = "&sp={page}&{query}&fo=json" + +IMG_SRC_FIXES = { + 'https://tile.loc.gov/storage-services/': 'https://tile.loc.gov/storage-services/', + 'https://loc.gov/pictures/static/images/': 'https://tile.loc.gov/storage-services/', + 'https://www.loc.gov/pictures/cdn/': 'https://tile.loc.gov/storage-services/', +} + + +def request(query, params): + + search_path = search_string.format( + query=urlencode({'q': query}), + page=params['pageno']) + + params['url'] = base_url + search_path + + return params + + +def response(resp): + results = [] + + json_data = loads(resp.text) + + for result in json_data['results']: + img_src = result['image']['full'] + for url_prefix, url_replace in IMG_SRC_FIXES.items(): + if img_src.startswith(url_prefix): + img_src = img_src.replace(url_prefix, url_replace) + break + else: + img_src = result['image']['thumb'] + results.append({ + 'url': result['links']['item'], + 'title': result['title'], + 'img_src': img_src, + 'thumbnail_src': result['image']['thumb'], + 'author': result['creator'], + 'template': 'images.html' + }) + + return results diff --git a/searx/engines/mediathekviewweb.py b/searx/engines/mediathekviewweb.py new file mode 100644 index 000000000..fa442c937 --- /dev/null +++ b/searx/engines/mediathekviewweb.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""MediathekViewWeb (API) + +""" + +# pylint: disable=missing-function-docstring + +import datetime +from json import loads, dumps + +about = { + "website": 'https://mediathekviewweb.de/', + "wikidata_id": 'Q27877380', + "official_api_documentation": 'https://gist.github.com/bagbag/a2888478d27de0e989cf777f81fb33de', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + +categories = ['videos'] +paging = True +time_range_support = False +safesearch = False + +def request(query, params): + + params['url'] = 'https://mediathekviewweb.de/api/query' + params['method'] = 'POST' + params['headers']['Content-type'] = 'text/plain' + params['data'] = dumps({ + 'queries' : [ + { + 'fields' : [ + 'title', + 'topic', + ], + 'query' : query + }, + ], + 'sortBy' : 'timestamp', + 'sortOrder' : 'desc', + 'future' : True, + 'offset' : (params['pageno'] - 1 )* 10, + 'size' : 10 + }) + return params + +def response(resp): + + resp = loads(resp.text) + + mwv_result = resp['result'] + mwv_result_list = mwv_result['results'] + + results = [] + + for item in mwv_result_list: + + item['hms'] = str(datetime.timedelta(seconds=item['duration'])) + + results.append({ + 'url' : item['url_video_hd'], + 'title' : "%(channel)s: %(title)s (%(hms)s)" % item, + 'length' : item['hms'], + 'content' : "%(description)s" % item, + }) + + return results diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index d79edd542..0cc175e1b 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -77,7 +77,7 @@ class OnlineProcessor(EngineProcessor): soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0) # raise_for_status - request_args['raise_for_httperror'] = params.get('raise_for_httperror', False) + request_args['raise_for_httperror'] = params.get('raise_for_httperror', True) # specific type of request (GET or POST) if params['method'] == 'GET': diff --git a/searx/settings.yml b/searx/settings.yml index af6e3a47e..87008eb20 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -583,25 +583,6 @@ engines: require_api_key: false results: HTML - - name : google play music - engine : xpath - search_url : https://play.google.com/store/search?q={query}&c=music - results_xpath : '//div[@class="WHE7ib mpg5gc"]' - title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a' - url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href' - content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]' - thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]//img/@data-src' - categories : music - shortcut : gps - disabled : True - about: - website: https://play.google.com/ - wikidata_id: Q79576 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - name : geektimes engine : xpath paging : True @@ -698,6 +679,11 @@ engines: require_api_key: false results: HTML + - name : library of congress + engine : loc + shortcut : loc + categories : images + - name : lobste.rs engine : xpath search_url : https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance @@ -1272,6 +1258,10 @@ engines: categories: videos disabled : True + - name : mediathekviewweb + engine : mediathekviewweb + shortcut : mvw + # - name : yacy # engine : yacy # shortcut : ya |