Merge branch 'master' into http1.1

author: Alexandre Flament <alex@al-f.net> 2016-10-22 14:25:50 +0200
committer: GitHub <noreply@github.com> 2016-10-22 14:25:50 +0200
commit: a88768efd8ee6b832febda8508cb1ba3c8778b94 (patch)
tree: a42f1078ce421a69edb7088c642461b6c05f7022 /searx/engines
parent: f90eb428c679d3852d9738f6279d045283340562 (diff)
parent: 85c7237a4f26cea523d5c3b8a863058e459ca07a (diff)
5 files changed, 208 insertions, 39 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 782b622b0..14376c31f 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -57,11 +57,17 @@ def load_module(filename):
 
 
 def load_engine(engine_data):
-    engine_name = engine_data['engine']
+
+    if '_' in engine_data['name']:
+        logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
+        sys.exit(1)
+
+    engine_module = engine_data['engine']
+
     try:
-        engine = load_module(engine_name + '.py')
+        engine = load_module(engine_module + '.py')
     except:
-        logger.exception('Cannot load engine "{}"'.format(engine_name))
+        logger.exception('Cannot load engine "{}"'.format(engine_module))
         return None
 
     for param_name in engine_data:
diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py
index c35327e8c..b55d7747a 100644
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
@@ -40,7 +40,7 @@ def response(resp):
     results = list()
     for result in search_res:
         url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
-        title = result.xpath('.//a[@title]/text()')[0]
+        title = extract_text(result.xpath('.//a[@title]'))
         content = extract_text(result.xpath('.//div[@class="files"]'))
         files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
         filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index 4c5d24008..9cd8284da 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -16,13 +16,14 @@ from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
+from searx.utils import get_torrent_size, convert_str_to_int
 
 # engine dependent config
 categories = ['videos', 'music', 'files']
 paging = True
 
 # search-url
-url = 'https://kickass.to/'
+url = 'https://kickass.cd/'
 search_url = url + 'search/{search_term}/{pageno}/'
 
 # specific xpath variables
@@ -57,41 +58,16 @@ def response(resp):
         href = urljoin(url, link.attrib['href'])
         title = extract_text(link)
         content = escape(extract_text(result.xpath(content_xpath)))
-        seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
-        leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
-        filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]
-        filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0]
-        files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0]
-
-        # convert seed to int if possible
-        if seed.isdigit():
-            seed = int(seed)
-        else:
-            seed = 0
+        seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
+        leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
+        filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]'))
+        files = extract_text(result.xpath('.//td[contains(@class, "center")][2]'))
 
-        # convert leech to int if possible
-        if leech.isdigit():
-            leech = int(leech)
-        else:
-            leech = 0
-
-        # convert filesize to byte if possible
-        try:
-            filesize = float(filesize)
-
-            # convert filesize to byte
-            if filesize_multiplier == 'TB':
-                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
-            elif filesize_multiplier == 'GB':
-                filesize = int(filesize * 1024 * 1024 * 1024)
-            elif filesize_multiplier == 'MB':
-                filesize = int(filesize * 1024 * 1024)
-            elif filesize_multiplier == 'KB':
-                filesize = int(filesize * 1024)
-        except:
-            filesize = None
-
-        # convert files to int if possible
+        seed = convert_str_to_int(seed)
+        leech = convert_str_to_int(leech)
+
+        filesize, filesize_multiplier = filesize_info.split()
+        filesize = get_torrent_size(filesize, filesize_multiplier)
         if files.isdigit():
             files = int(files)
         else:
diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py
new file mode 100644
index 000000000..f784e106f
--- /dev/null
+++ b/searx/engines/pdbe.py
@@ -0,0 +1,109 @@
+"""
+ PDBe (Protein Data Bank in Europe)
+
+ @website       https://www.ebi.ac.uk/pdbe
+ @provide-api   yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
+                unlimited
+ @using-api     yes
+ @results       python dictionary (from json)
+ @stable        yes
+ @parse         url, title, content, img_src
+"""
+
+from json import loads
+from flask_babel import gettext
+
+categories = ['science']
+
+hide_obsolete = False
+
+# status codes of unpublished entries
+pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN']
+# url for api query
+pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?'
+# base url for results
+pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}'
+# link to preview image of structure
+pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png'
+
+
+def request(query, params):
+
+    params['url'] = pdbe_solr_url
+    params['method'] = 'POST'
+    params['data'] = {
+        'q': query,
+        'wt': "json"  # request response in parsable format
+    }
+    return params
+
+
+def construct_body(result):
+    # set title
+    title = result['title']
+
+    # construct content body
+    content = """{title}<br />{authors} {journal} <strong>{volume}</strong>&nbsp;{page} ({year})"""
+
+    # replace placeholders with actual content
+    try:
+        if result['journal']:
+            content = content.format(
+                title=result['citation_title'],
+                authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'],
+                page=result['journal_page'], year=result['citation_year'])
+        else:
+            content = content.format(
+                title=result['citation_title'],
+                authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year'])
+        img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
+    except (KeyError):
+        content = None
+        img_src = None
+
+    # construct url for preview image
+    try:
+        img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
+    except (KeyError):
+        img_src = None
+
+    return [title, content, img_src]
+
+
+def response(resp):
+
+    results = []
+    json = loads(resp.text)['response']['docs']
+
+    # parse results
+    for result in json:
+        # catch obsolete entries and mark them accordingly
+        if result['status'] in pdb_unpublished_codes:
+            continue
+        if hide_obsolete:
+            continue
+        if result['status'] == 'OBS':
+            # expand title to add some sort of warning message
+            title = gettext('{title}&nbsp;(OBSOLETE)').format(title=result['title'])
+            superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by'])
+
+            # since we can't construct a proper body from the response, we'll make up our own
+            msg_superseded = gettext("This entry has been superseded by")
+            content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format(
+                msg_superseded=msg_superseded,
+                url=superseded_url,
+                pdb_id=result['superseded_by'], )
+
+            # obsoleted entries don't have preview images
+            img_src = None
+        else:
+            title, content, img_src = construct_body(result)
+
+        results.append({
+            'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
+            'title': title,
+            'content': content,
+            'img_src': img_src
+        })
+
+    return results
diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py
new file mode 100644
index 000000000..854ebba03
--- /dev/null
+++ b/searx/engines/seedpeer.py
@@ -0,0 +1,78 @@
+#  Seedpeer (Videos, Music, Files)
+#
+# @website     http://seedpeer.eu
+# @provide-api no (nothing found)
+#
+# @using-api   no
+# @results     HTML (using search portal)
+# @stable      yes (HTML can change)
+# @parse       url, title, content, seed, leech, magnetlink
+
+from urlparse import urljoin
+from cgi import escape
+from urllib import quote
+from lxml import html
+from operator import itemgetter
+from searx.engines.xpath import extract_text
+
+
+url = 'http://www.seedpeer.eu/'
+search_url = url + 'search/{search_term}/7/{page_no}.html'
+# specific xpath variables
+torrent_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a'
+alternative_torrent_xpath = '//*[@id="body"]/center/center/table[1]/tr/td/a'
+title_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a/text()'
+alternative_title_xpath = '//*[@id="body"]/center/center/table/tr/td/a'
+seeds_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[4]/font/text()'
+alternative_seeds_xpath = '//*[@id="body"]/center/center/table/tr/td[4]/font/text()'
+peers_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[5]/font/text()'
+alternative_peers_xpath = '//*[@id="body"]/center/center/table/tr/td[5]/font/text()'
+age_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[2]/text()'
+alternative_age_xpath = '//*[@id="body"]/center/center/table/tr/td[2]/text()'
+size_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[3]/text()'
+alternative_size_xpath = '//*[@id="body"]/center/center/table/tr/td[3]/text()'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(search_term=quote(query),
+                                      page_no=params['pageno'] - 1)
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+    dom = html.fromstring(resp.text)
+    torrent_links = dom.xpath(torrent_xpath)
+    if len(torrent_links) > 0:
+        seeds = dom.xpath(seeds_xpath)
+        peers = dom.xpath(peers_xpath)
+        titles = dom.xpath(title_xpath)
+        sizes = dom.xpath(size_xpath)
+        ages = dom.xpath(age_xpath)
+    else:  # under ~5 results uses a different xpath
+        torrent_links = dom.xpath(alternative_torrent_xpath)
+        seeds = dom.xpath(alternative_seeds_xpath)
+        peers = dom.xpath(alternative_peers_xpath)
+        titles = dom.xpath(alternative_title_xpath)
+        sizes = dom.xpath(alternative_size_xpath)
+        ages = dom.xpath(alternative_age_xpath)
+    # return empty array if nothing is found
+    if not torrent_links:
+        return []
+
+    # parse results
+    for index, result in enumerate(torrent_links):
+        link = result.attrib.get('href')
+        href = urljoin(url, link)
+        results.append({'url': href,
+                        'title': titles[index].text_content(),
+                        'content': '{}, {}'.format(sizes[index], ages[index]),
+                        'seed': seeds[index],
+                        'leech': peers[index],
+
+                        'template': 'torrent.html'})
+
+    # return results sorted by seeder
+    return sorted(results, key=itemgetter('seed'), reverse=True)
author	Alexandre Flament <alex@al-f.net>	2016-10-22 14:25:50 +0200
committer	GitHub <noreply@github.com>	2016-10-22 14:25:50 +0200
commit	a88768efd8ee6b832febda8508cb1ba3c8778b94 (patch)
tree	a42f1078ce421a69edb7088c642461b6c05f7022 /searx/engines
parent	f90eb428c679d3852d9738f6279d045283340562 (diff)
parent	85c7237a4f26cea523d5c3b8a863058e459ca07a (diff)