From e5677ae6b6b23c943e64d8e2abcb64c13c0e8bbf Mon Sep 17 00:00:00 2001 From: Kirill Isakov Date: Fri, 25 Mar 2016 00:24:37 +0600 Subject: Add Nyaa.se search engine --- searx/engines/nyaa.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 searx/engines/nyaa.py (limited to 'searx/engines/nyaa.py') diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py new file mode 100644 index 000000000..81caed76f --- /dev/null +++ b/searx/engines/nyaa.py @@ -0,0 +1,115 @@ +""" + Nyaa.se (Anime Bittorrent tracker) + + @website http://www.nyaa.se/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content, seed, leech, torrentfile +""" + +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['files', 'images', 'videos', 'music'] +paging = True + +# search-url +base_url = 'http://www.nyaa.se/' +search_url = base_url + '?page=search&{query}&offset={offset}' + +# xpath queries +xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' +xpath_category = './/td[@class="tlisticon"]/a' +xpath_title = './/td[@class="tlistname"]/a' +xpath_torrent_file = './/td[@class="tlistdownload"]/a' +xpath_filesize = './/td[@class="tlistsize"]/text()' +xpath_seeds = './/td[@class="tlistsn"]/text()' +xpath_leeches = './/td[@class="tlistln"]/text()' +xpath_downloads = './/td[@class="tlistdn"]/text()' + + +# convert a variable to integer or return 0 if it's not a number +def int_or_zero(num): + if isinstance(num, list): + if len(num) < 1: + return 0 + num = num[0] + if num.isdigit(): + return int(num) + return 0 + + +# do search-request +def request(query, params): + query = urlencode({'term': query}) + params['url'] = search_url.format(query=query, offset=params['pageno']) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath(xpath_results): + # category in which our torrent belongs + category = result.xpath(xpath_category)[0].attrib.get('title') + + # torrent title + page_a = result.xpath(xpath_title)[0] + title = escape(extract_text(page_a)) + + # link to the page + href = page_a.attrib.get('href') + + # link to the torrent file + torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') + + # torrent size + try: + file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') + + # convert torrent size to bytes. + # if there is no correct index in this dictionary, + # the try block fails as it should + multiplier = { + 'KIB': 1024, + 'MIB': 1024 ** 2, + 'GIB': 1024 ** 3, + 'TIB': 1024 ** 4 + }[suffix.upper()] + + file_size = int(float(file_size) * multiplier) + except Exception as e: + file_size = None + + # seed count + seed = int_or_zero(result.xpath(xpath_seeds)) + + # leech count + leech = int_or_zero(result.xpath(xpath_leeches)) + + # torrent downloads count + downloads = int_or_zero(result.xpath(xpath_downloads)) + + # content string contains all information not included into template + content = 'Category: "{category}". Downloaded {downloads} times.' + content = content.format(category=category, downloads=downloads) + content = escape(content) + + results.append({'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': file_size, + 'torrentfile': torrent_link, + 'template': 'torrent.html'}) + + return results -- cgit v1.2.3 From 7fbc12ee4e6aea8a8ad0098deb03054976056371 Mon Sep 17 00:00:00 2001 From: Kirill Isakov Date: Sat, 26 Mar 2016 05:28:58 +0600 Subject: Add Torrentz.eu search engine --- searx/engines/nyaa.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'searx/engines/nyaa.py') diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index 81caed76f..b0f884b77 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -43,6 +43,19 @@ def int_or_zero(num): return int(num) return 0 +# get multiplier to convert torrent size to bytes +def get_filesize_mul(suffix): + return { + 'KB': 1024, + 'MB': 1024 ** 2, + 'GB': 1024 ** 3, + 'TB': 1024 ** 4, + + 'KIB': 1024, + 'MIB': 1024 ** 2, + 'GIB': 1024 ** 3, + 'TIB': 1024 ** 4 + }[str(suffix).upper()] # do search-request def request(query, params): @@ -74,18 +87,7 @@ def response(resp): # torrent size try: file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') - - # convert torrent size to bytes. - # if there is no correct index in this dictionary, - # the try block fails as it should - multiplier = { - 'KIB': 1024, - 'MIB': 1024 ** 2, - 'GIB': 1024 ** 3, - 'TIB': 1024 ** 4 - }[suffix.upper()] - - file_size = int(float(file_size) * multiplier) + file_size = int(float(file_size) * get_filesize_mul(suffix)) except Exception as e: file_size = None -- cgit v1.2.3 From e202c6fbec7c9dff67acdfe387699010d61da32b Mon Sep 17 00:00:00 2001 From: Kirill Isakov Date: Sun, 27 Mar 2016 05:23:17 +0600 Subject: Fix PEP8 warnings for Nyaa.py --- searx/engines/nyaa.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'searx/engines/nyaa.py') diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index b0f884b77..cda8231f7 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -43,6 +43,7 @@ def int_or_zero(num): return int(num) return 0 + # get multiplier to convert torrent size to bytes def get_filesize_mul(suffix): return { @@ -57,6 +58,7 @@ def get_filesize_mul(suffix): 'TIB': 1024 ** 4 }[str(suffix).upper()] + # do search-request def request(query, params): query = urlencode({'term': query}) -- cgit v1.2.3