diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2022-02-17 22:10:34 +0100 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-02-18 22:44:43 +0100 |
| commit | 05c105b8371e3766dba35e815601881d83ef6383 (patch) | |
| tree | 06c79a31e6ddcbee2254cc7c627e13c4758413ba /searx/engines/bandcamp.py | |
| parent | bf2a2ed48faf511a609d3b084b02066d69549015 (diff) | |
[fix] bandcamp: fix itemtype (album|track) and exceptions
BTW: polish implementation and show tracklist for albums
Closes: https://github.com/searxng/searxng/issues/883
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/bandcamp.py')
| -rw-r--r-- | searx/engines/bandcamp.py | 48 |
1 files changed, 32 insertions, 16 deletions
diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py index f868b44ed..f83ca6d4f 100644 --- a/searx/engines/bandcamp.py +++ b/searx/engines/bandcamp.py @@ -1,16 +1,23 @@ -""" -Bandcamp (Music) +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Bandcamp (Music) @website https://bandcamp.com/ @provide-api no @results HTML @parse url, title, content, publishedDate, iframe_src, thumbnail + """ from urllib.parse import urlencode, urlparse, parse_qs from dateutil.parser import parse as dateparse from lxml import html -from searx.utils import extract_text + +from searx.utils import ( + eval_xpath_getindex, + eval_xpath_list, + extract_text, +) # about about = { @@ -26,12 +33,13 @@ categories = ['music'] paging = True base_url = "https://bandcamp.com/" -search_string = search_string = 'search?{query}&page={page}' -iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/" +search_string = 'search?{query}&page={page}' +iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small" def request(query, params): '''pre-request callback + params<dict>: method : POST/GET headers : {} @@ -42,37 +50,45 @@ def request(query, params): ''' search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) - params['url'] = base_url + search_path - return params def response(resp): '''post-response callback + resp: requests response object ''' results = [] - tree = html.fromstring(resp.text) - search_results = tree.xpath('//li[contains(@class, "searchresult")]') - for result in search_results: - link = result.xpath('.//div[@class="itemurl"]/a')[0] - result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] + dom = html.fromstring(resp.text) + + for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'): + + link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None) + if link is None: + continue + title = result.xpath('.//div[@class="heading"]/a/text()') - date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", "")) content = result.xpath('.//div[@class="subhead"]/text()') new_result = { "url": extract_text(link), "title": extract_text(title), "content": extract_text(content), - "publishedDate": date, } + + date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None) + if date: + new_result["publishedDate"] = dateparse(date.replace("released ", "")) + thumbnail = result.xpath('.//div[@class="art"]/img/@src') if thumbnail: new_result['thumbnail'] = thumbnail[0] - if "album" in result.classes: + + result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] + itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower() + if "album" == itemtype: new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id) - elif "track" in result.classes: + elif "track" == itemtype: new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id) results.append(new_result) |