diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/gigablast.py | 35 | ||||
| -rw-r--r-- | searx/engines/imdb.py | 2 | ||||
| -rw-r--r-- | searx/engines/yggtorrent.py | 127 |
3 files changed, 22 insertions, 142 deletions
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index c2c51afe8..0f685abc5 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -6,6 +6,7 @@ # pylint: disable=invalid-name import re +from time import time from json import loads from urllib.parse import urlencode from searx.network import get @@ -28,13 +29,18 @@ safesearch = True # search-url base_url = 'https://gigablast.com' +search_path = '/search?' # ugly hack: gigablast requires a random extra parameter which can be extracted # from the source code of the gigablast HTTP client extra_param = '' -extra_param_path='/search?c=main&qlangcountry=en-us&q=south&s=10' +# timestamp of the last fetch of extra_param +extra_param_ts = 0 +# after how many seconds extra_param expire +extra_param_expiration_delay = 3000 -def parse_extra_param(text): + +def fetch_extra_param(query_args, headers): # example: # @@ -43,7 +49,12 @@ def parse_extra_param(text): # # extra_param --> "rand=1590740241635&nsab=730863287" - global extra_param # pylint: disable=global-statement + global extra_param, extra_param_ts # pylint: disable=global-statement + + extra_param_ts = time() + extra_param_path = search_path + urlencode(query_args) + text = get(base_url + extra_param_path, headers=headers).text + re_var= None for line in text.splitlines(): if re_var is None and extra_param_path in line: @@ -54,21 +65,12 @@ def parse_extra_param(text): if re_var is not None and re_var.search(line): extra_param += re_var.search(line).group(1) break - # logger.debug('gigablast extra_param="%s"', extra_param) - -def init(engine_settings=None): # pylint: disable=unused-argument - parse_extra_param(get(base_url + extra_param_path).text) # do search-request def request(query, params): # pylint: disable=unused-argument - - # see API http://www.gigablast.com/api.html#/search - # Take into account, that the API has some quirks .. - query_args = dict( c = 'main' - , format = 'json' , q = query , dr = 1 , showgoodimages = 0 @@ -81,8 +83,13 @@ def request(query, params): # pylint: disable=unused-argument if params['safesearch'] >= 1: query_args['ff'] = 1 - search_url = '/search?' + urlencode(query_args) - params['url'] = base_url + search_url + extra_param + # see API http://www.gigablast.com/api.html#/search + # Take into account, that the API has some quirks .. + if time() > (extra_param_ts + extra_param_expiration_delay): + fetch_extra_param(query_args, params['headers']) + + query_args['format'] = 'json' + params['url'] = base_url + search_path + urlencode(query_args) + extra_param return params diff --git a/searx/engines/imdb.py b/searx/engines/imdb.py index a3dab736d..a7474fd5b 100644 --- a/searx/engines/imdb.py +++ b/searx/engines/imdb.py @@ -57,7 +57,7 @@ def response(resp): suggestions = json.loads(resp.text) results = [] - for entry in suggestions['d']: + for entry in suggestions.get('d', []): # https://developer.imdb.com/documentation/key-concepts#imdb-ids entry_id = entry['id'] diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py deleted file mode 100644 index 31a0408b8..000000000 --- a/searx/engines/yggtorrent.py +++ /dev/null @@ -1,127 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Yggtorrent (Videos, Music, Files) -""" - -from lxml import html -from operator import itemgetter -from datetime import datetime -from urllib.parse import quote -from searx.utils import extract_text, get_torrent_size -from searx.network import get as http_get - -# about -about = { - "website": 'https://www4.yggtorrent.li/', - "wikidata_id": None, - "official_api_documentation": None, - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['files'] -paging = True - -# search-url -url = 'https://www4.yggtorrent.li/' -search_url = url + 'engine/search?name={search_term}&do=search&page={pageno}&category={search_type}' - -# yggtorrent specific type-definitions -search_types = {'files': 'all', - 'music': '2139', - 'videos': '2145'} - -cookies = dict() - - -def init(engine_settings=None): - # initial cookies - resp = http_get(url, allow_redirects=False) - if resp.ok: - for r in resp.history: - cookies.update(r.cookies) - cookies.update(resp.cookies) - - -# do search-request -def request(query, params): - search_type = search_types.get(params['category'], 'all') - pageno = (params['pageno'] - 1) * 50 - - params['url'] = search_url.format(search_term=quote(query), - search_type=search_type, - pageno=pageno) - - params['cookies'] = cookies - - return params - - -# get response from search-request -def response(resp): - results = [] - dom = html.fromstring(resp.text) - - search_res = dom.xpath('//section[@id="#torrents"]/div/table/tbody/tr') - - # return empty array if nothing is found - if not search_res: - return [] - - # parse results - for result in search_res: - link = result.xpath('.//a[@id="torrent_name"]')[0] - href = link.attrib.get('href') - title = extract_text(link) - seed = result.xpath('.//td[8]/text()')[0] - leech = result.xpath('.//td[9]/text()')[0] - - # convert seed to int if possible - if seed.isdigit(): - seed = int(seed) - else: - seed = 0 - - # convert leech to int if possible - if leech.isdigit(): - leech = int(leech) - else: - leech = 0 - - params = {'url': href, - 'title': title, - 'seed': seed, - 'leech': leech, - 'template': 'torrent.html'} - - # let's try to calculate the torrent size - try: - filesize_info = result.xpath('.//td[6]/text()')[0] - filesize = filesize_info[:-2] - filesize_multiplier = filesize_info[-2:].lower() - multiplier_french_to_english = { - 'to': 'TiB', - 'go': 'GiB', - 'mo': 'MiB', - 'ko': 'KiB' - } - filesize = get_torrent_size(filesize, multiplier_french_to_english[filesize_multiplier]) - params['filesize'] = filesize - except: - pass - - # extract and convert creation date - try: - date_ts = result.xpath('.//td[5]/div/text()')[0] - date = datetime.fromtimestamp(float(date_ts)) - params['publishedDate'] = date - except: - pass - - # append result - results.append(params) - - # return results sorted by seeder - return sorted(results, key=itemgetter('seed'), reverse=True) |