diff options
| author | Alexandre Flament <alex@al-f.net> | 2016-10-22 14:25:50 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-10-22 14:25:50 +0200 |
| commit | a88768efd8ee6b832febda8508cb1ba3c8778b94 (patch) | |
| tree | a42f1078ce421a69edb7088c642461b6c05f7022 /searx/engines | |
| parent | f90eb428c679d3852d9738f6279d045283340562 (diff) | |
| parent | 85c7237a4f26cea523d5c3b8a863058e459ca07a (diff) | |
Merge branch 'master' into http1.1
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/__init__.py | 12 | ||||
| -rw-r--r-- | searx/engines/digbt.py | 2 | ||||
| -rw-r--r-- | searx/engines/kickass.py | 46 | ||||
| -rw-r--r-- | searx/engines/pdbe.py | 109 | ||||
| -rw-r--r-- | searx/engines/seedpeer.py | 78 |
5 files changed, 208 insertions, 39 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 782b622b0..14376c31f 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -57,11 +57,17 @@ def load_module(filename): def load_engine(engine_data): - engine_name = engine_data['engine'] + + if '_' in engine_data['name']: + logger.error('Engine name conains underscore: "{}"'.format(engine_data['name'])) + sys.exit(1) + + engine_module = engine_data['engine'] + try: - engine = load_module(engine_name + '.py') + engine = load_module(engine_module + '.py') except: - logger.exception('Cannot load engine "{}"'.format(engine_name)) + logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index c35327e8c..b55d7747a 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -40,7 +40,7 @@ def response(resp): results = list() for result in search_res: url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) - title = result.xpath('.//a[@title]/text()')[0] + title = extract_text(result.xpath('.//a[@title]')) content = extract_text(result.xpath('.//div[@class="files"]')) files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 4c5d24008..9cd8284da 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -16,13 +16,14 @@ from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size, convert_str_to_int # engine dependent config categories = ['videos', 'music', 'files'] paging = True # search-url -url = 'https://kickass.to/' +url = 'https://kickass.cd/' search_url = url + 'search/{search_term}/{pageno}/' # specific xpath variables @@ -57,41 +58,16 @@ def response(resp): href = urljoin(url, link.attrib['href']) title = extract_text(link) content = escape(extract_text(result.xpath(content_xpath))) - seed = result.xpath('.//td[contains(@class, "green")]/text()')[0] - leech = result.xpath('.//td[contains(@class, "red")]/text()')[0] - filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0] - filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0] - files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0] - - # convert seed to int if possible - if seed.isdigit(): - seed = int(seed) - else: - seed = 0 + seed = extract_text(result.xpath('.//td[contains(@class, "green")]')) + leech = extract_text(result.xpath('.//td[contains(@class, "red")]')) + filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]')) + files = extract_text(result.xpath('.//td[contains(@class, "center")][2]')) - # convert leech to int if possible - if leech.isdigit(): - leech = int(leech) - else: - leech = 0 - - # convert filesize to byte if possible - try: - filesize = float(filesize) - - # convert filesize to byte - if filesize_multiplier == 'TB': - filesize = int(filesize * 1024 * 1024 * 1024 * 1024) - elif filesize_multiplier == 'GB': - filesize = int(filesize * 1024 * 1024 * 1024) - elif filesize_multiplier == 'MB': - filesize = int(filesize * 1024 * 1024) - elif filesize_multiplier == 'KB': - filesize = int(filesize * 1024) - except: - filesize = None - - # convert files to int if possible + seed = convert_str_to_int(seed) + leech = convert_str_to_int(leech) + + filesize, filesize_multiplier = filesize_info.split() + filesize = get_torrent_size(filesize, filesize_multiplier) if files.isdigit(): files = int(files) else: diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py new file mode 100644 index 000000000..f784e106f --- /dev/null +++ b/searx/engines/pdbe.py @@ -0,0 +1,109 @@ +""" + PDBe (Protein Data Bank in Europe) + + @website https://www.ebi.ac.uk/pdbe + @provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html), + unlimited + @using-api yes + @results python dictionary (from json) + @stable yes + @parse url, title, content, img_src +""" + +from json import loads +from flask_babel import gettext + +categories = ['science'] + +hide_obsolete = False + +# status codes of unpublished entries +pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN'] +# url for api query +pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?' +# base url for results +pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}' +# link to preview image of structure +pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png' + + +def request(query, params): + + params['url'] = pdbe_solr_url + params['method'] = 'POST' + params['data'] = { + 'q': query, + 'wt': "json" # request response in parsable format + } + return params + + +def construct_body(result): + # set title + title = result['title'] + + # construct content body + content = """{title}<br />{authors} {journal} <strong>{volume}</strong> {page} ({year})""" + + # replace placeholders with actual content + try: + if result['journal']: + content = content.format( + title=result['citation_title'], + authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'], + page=result['journal_page'], year=result['citation_year']) + else: + content = content.format( + title=result['citation_title'], + authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year']) + img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) + except (KeyError): + content = None + img_src = None + + # construct url for preview image + try: + img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) + except (KeyError): + img_src = None + + return [title, content, img_src] + + +def response(resp): + + results = [] + json = loads(resp.text)['response']['docs'] + + # parse results + for result in json: + # catch obsolete entries and mark them accordingly + if result['status'] in pdb_unpublished_codes: + continue + if hide_obsolete: + continue + if result['status'] == 'OBS': + # expand title to add some sort of warning message + title = gettext('{title} (OBSOLETE)').format(title=result['title']) + superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by']) + + # since we can't construct a proper body from the response, we'll make up our own + msg_superseded = gettext("This entry has been superseded by") + content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format( + msg_superseded=msg_superseded, + url=superseded_url, + pdb_id=result['superseded_by'], ) + + # obsoleted entries don't have preview images + img_src = None + else: + title, content, img_src = construct_body(result) + + results.append({ + 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']), + 'title': title, + 'content': content, + 'img_src': img_src + }) + + return results diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py new file mode 100644 index 000000000..854ebba03 --- /dev/null +++ b/searx/engines/seedpeer.py @@ -0,0 +1,78 @@ +# Seedpeer (Videos, Music, Files) +# +# @website http://seedpeer.eu +# @provide-api no (nothing found) +# +# @using-api no +# @results HTML (using search portal) +# @stable yes (HTML can change) +# @parse url, title, content, seed, leech, magnetlink + +from urlparse import urljoin +from cgi import escape +from urllib import quote +from lxml import html +from operator import itemgetter +from searx.engines.xpath import extract_text + + +url = 'http://www.seedpeer.eu/' +search_url = url + 'search/{search_term}/7/{page_no}.html' +# specific xpath variables +torrent_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a' +alternative_torrent_xpath = '//*[@id="body"]/center/center/table[1]/tr/td/a' +title_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a/text()' +alternative_title_xpath = '//*[@id="body"]/center/center/table/tr/td/a' +seeds_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[4]/font/text()' +alternative_seeds_xpath = '//*[@id="body"]/center/center/table/tr/td[4]/font/text()' +peers_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[5]/font/text()' +alternative_peers_xpath = '//*[@id="body"]/center/center/table/tr/td[5]/font/text()' +age_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[2]/text()' +alternative_age_xpath = '//*[@id="body"]/center/center/table/tr/td[2]/text()' +size_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[3]/text()' +alternative_size_xpath = '//*[@id="body"]/center/center/table/tr/td[3]/text()' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(search_term=quote(query), + page_no=params['pageno'] - 1) + return params + + +# get response from search-request +def response(resp): + results = [] + dom = html.fromstring(resp.text) + torrent_links = dom.xpath(torrent_xpath) + if len(torrent_links) > 0: + seeds = dom.xpath(seeds_xpath) + peers = dom.xpath(peers_xpath) + titles = dom.xpath(title_xpath) + sizes = dom.xpath(size_xpath) + ages = dom.xpath(age_xpath) + else: # under ~5 results uses a different xpath + torrent_links = dom.xpath(alternative_torrent_xpath) + seeds = dom.xpath(alternative_seeds_xpath) + peers = dom.xpath(alternative_peers_xpath) + titles = dom.xpath(alternative_title_xpath) + sizes = dom.xpath(alternative_size_xpath) + ages = dom.xpath(alternative_age_xpath) + # return empty array if nothing is found + if not torrent_links: + return [] + + # parse results + for index, result in enumerate(torrent_links): + link = result.attrib.get('href') + href = urljoin(url, link) + results.append({'url': href, + 'title': titles[index].text_content(), + 'content': '{}, {}'.format(sizes[index], ages[index]), + 'seed': seeds[index], + 'leech': peers[index], + + 'template': 'torrent.html'}) + + # return results sorted by seeder + return sorted(results, key=itemgetter('seed'), reverse=True) |