diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2016-10-03 13:21:12 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-10-03 13:21:12 +0200 |
| commit | e7d005c621a0ce08d3fcdfc6122d27c809a5dff3 (patch) | |
| tree | b2ba6806f5a78fc2934e980fc864466b6f758e4e /searx | |
| parent | dceb9031140a12972d000849ea8819a6d383739a (diff) | |
| parent | beff8c6007c364de3e42b4aa5114ce9db5e544cd (diff) | |
Merge pull request #677 from pydo/feature/seedpeer-engine-integration
Feature/seedpeer engine integration
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/engines/seedpeer.py | 78 | ||||
| -rw-r--r-- | searx/settings.yml | 6 |
2 files changed, 84 insertions, 0 deletions
diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py new file mode 100644 index 000000000..854ebba03 --- /dev/null +++ b/searx/engines/seedpeer.py @@ -0,0 +1,78 @@ +# Seedpeer (Videos, Music, Files) +# +# @website http://seedpeer.eu +# @provide-api no (nothing found) +# +# @using-api no +# @results HTML (using search portal) +# @stable yes (HTML can change) +# @parse url, title, content, seed, leech, magnetlink + +from urlparse import urljoin +from cgi import escape +from urllib import quote +from lxml import html +from operator import itemgetter +from searx.engines.xpath import extract_text + + +url = 'http://www.seedpeer.eu/' +search_url = url + 'search/{search_term}/7/{page_no}.html' +# specific xpath variables +torrent_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a' +alternative_torrent_xpath = '//*[@id="body"]/center/center/table[1]/tr/td/a' +title_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a/text()' +alternative_title_xpath = '//*[@id="body"]/center/center/table/tr/td/a' +seeds_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[4]/font/text()' +alternative_seeds_xpath = '//*[@id="body"]/center/center/table/tr/td[4]/font/text()' +peers_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[5]/font/text()' +alternative_peers_xpath = '//*[@id="body"]/center/center/table/tr/td[5]/font/text()' +age_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[2]/text()' +alternative_age_xpath = '//*[@id="body"]/center/center/table/tr/td[2]/text()' +size_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[3]/text()' +alternative_size_xpath = '//*[@id="body"]/center/center/table/tr/td[3]/text()' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(search_term=quote(query), + page_no=params['pageno'] - 1) + return params + + +# get response from search-request +def response(resp): + results = [] + dom = html.fromstring(resp.text) + torrent_links = dom.xpath(torrent_xpath) + if len(torrent_links) > 0: + seeds = dom.xpath(seeds_xpath) + peers = dom.xpath(peers_xpath) + titles = dom.xpath(title_xpath) + sizes = dom.xpath(size_xpath) + ages = dom.xpath(age_xpath) + else: # under ~5 results uses a different xpath + torrent_links = dom.xpath(alternative_torrent_xpath) + seeds = dom.xpath(alternative_seeds_xpath) + peers = dom.xpath(alternative_peers_xpath) + titles = dom.xpath(alternative_title_xpath) + sizes = dom.xpath(alternative_size_xpath) + ages = dom.xpath(alternative_age_xpath) + # return empty array if nothing is found + if not torrent_links: + return [] + + # parse results + for index, result in enumerate(torrent_links): + link = result.attrib.get('href') + href = urljoin(url, link) + results.append({'url': href, + 'title': titles[index].text_content(), + 'content': '{}, {}'.format(sizes[index], ages[index]), + 'seed': seeds[index], + 'leech': peers[index], + + 'template': 'torrent.html'}) + + # return results sorted by seeder + return sorted(results, key=itemgetter('seed'), reverse=True) diff --git a/searx/settings.yml b/searx/settings.yml index 72bc7ed0f..f6848a24e 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -495,6 +495,12 @@ engines: timeout: 6.0 categories : science + - name : seedpeer + engine : seedpeer + shortcut: speu + categories: files, music, videos + disabled: True + - name : dictzone engine : dictzone shortcut : dc |