Merge pull request #677 from pydo/feature/seedpeer-engine-integration

Feature/seedpeer engine integration
author: Adam Tauber <asciimoo@gmail.com> 2016-10-03 13:21:12 +0200
committer: GitHub <noreply@github.com> 2016-10-03 13:21:12 +0200
commit: e7d005c621a0ce08d3fcdfc6122d27c809a5dff3 (patch)
tree: b2ba6806f5a78fc2934e980fc864466b6f758e4e /searx
parent: dceb9031140a12972d000849ea8819a6d383739a (diff)
parent: beff8c6007c364de3e42b4aa5114ce9db5e544cd (diff)
2 files changed, 84 insertions, 0 deletions
diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py
new file mode 100644
index 000000000..854ebba03
--- /dev/null
+++ b/searx/engines/seedpeer.py
@@ -0,0 +1,78 @@
+#  Seedpeer (Videos, Music, Files)
+#
+# @website     http://seedpeer.eu
+# @provide-api no (nothing found)
+#
+# @using-api   no
+# @results     HTML (using search portal)
+# @stable      yes (HTML can change)
+# @parse       url, title, content, seed, leech, magnetlink
+
+from urlparse import urljoin
+from cgi import escape
+from urllib import quote
+from lxml import html
+from operator import itemgetter
+from searx.engines.xpath import extract_text
+
+
+url = 'http://www.seedpeer.eu/'
+search_url = url + 'search/{search_term}/7/{page_no}.html'
+# specific xpath variables
+torrent_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a'
+alternative_torrent_xpath = '//*[@id="body"]/center/center/table[1]/tr/td/a'
+title_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a/text()'
+alternative_title_xpath = '//*[@id="body"]/center/center/table/tr/td/a'
+seeds_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[4]/font/text()'
+alternative_seeds_xpath = '//*[@id="body"]/center/center/table/tr/td[4]/font/text()'
+peers_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[5]/font/text()'
+alternative_peers_xpath = '//*[@id="body"]/center/center/table/tr/td[5]/font/text()'
+age_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[2]/text()'
+alternative_age_xpath = '//*[@id="body"]/center/center/table/tr/td[2]/text()'
+size_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[3]/text()'
+alternative_size_xpath = '//*[@id="body"]/center/center/table/tr/td[3]/text()'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(search_term=quote(query),
+                                      page_no=params['pageno'] - 1)
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+    dom = html.fromstring(resp.text)
+    torrent_links = dom.xpath(torrent_xpath)
+    if len(torrent_links) > 0:
+        seeds = dom.xpath(seeds_xpath)
+        peers = dom.xpath(peers_xpath)
+        titles = dom.xpath(title_xpath)
+        sizes = dom.xpath(size_xpath)
+        ages = dom.xpath(age_xpath)
+    else:  # under ~5 results uses a different xpath
+        torrent_links = dom.xpath(alternative_torrent_xpath)
+        seeds = dom.xpath(alternative_seeds_xpath)
+        peers = dom.xpath(alternative_peers_xpath)
+        titles = dom.xpath(alternative_title_xpath)
+        sizes = dom.xpath(alternative_size_xpath)
+        ages = dom.xpath(alternative_age_xpath)
+    # return empty array if nothing is found
+    if not torrent_links:
+        return []
+
+    # parse results
+    for index, result in enumerate(torrent_links):
+        link = result.attrib.get('href')
+        href = urljoin(url, link)
+        results.append({'url': href,
+                        'title': titles[index].text_content(),
+                        'content': '{}, {}'.format(sizes[index], ages[index]),
+                        'seed': seeds[index],
+                        'leech': peers[index],
+
+                        'template': 'torrent.html'})
+
+    # return results sorted by seeder
+    return sorted(results, key=itemgetter('seed'), reverse=True)
diff --git a/searx/settings.yml b/searx/settings.yml
index 72bc7ed0f..f6848a24e 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -495,6 +495,12 @@ engines:
     timeout: 6.0
     categories : science
 
+  - name : seedpeer
+    engine : seedpeer
+    shortcut: speu
+    categories: files, music, videos
+    disabled: True
+
   - name : dictzone
     engine : dictzone
     shortcut : dc
author	Adam Tauber <asciimoo@gmail.com>	2016-10-03 13:21:12 +0200
committer	GitHub <noreply@github.com>	2016-10-03 13:21:12 +0200
commit	e7d005c621a0ce08d3fcdfc6122d27c809a5dff3 (patch)
tree	b2ba6806f5a78fc2934e980fc864466b6f758e4e /searx
parent	dceb9031140a12972d000849ea8819a6d383739a (diff)
parent	beff8c6007c364de3e42b4aa5114ce9db5e544cd (diff)