diff options
| author | Alexandre Flament <alex@al-f.net> | 2017-01-15 22:18:20 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-01-15 22:18:20 +0100 |
| commit | 7a16aca346c14994f304a6961c55b63ab260265a (patch) | |
| tree | 4cd44fc2fe23d17d1cb574042c98a276f04253c9 /searx/engines | |
| parent | 627962ce40cd66d84f1be2d88907496fbd40b902 (diff) | |
| parent | 1a9f8240b851c64a10be7b8990b6f3926ca506b3 (diff) | |
Merge branch 'master' into flask_perimeter
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/1337x.py | 40 | ||||
| -rw-r--r-- | searx/engines/google.py | 4 | ||||
| -rw-r--r-- | searx/engines/google_news.py | 13 |
3 files changed, 52 insertions, 5 deletions
diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py new file mode 100644 index 000000000..c6bc3cb6d --- /dev/null +++ b/searx/engines/1337x.py @@ -0,0 +1,40 @@ +from urllib import quote +from lxml import html +from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size +from urlparse import urljoin + +url = 'https://1337x.to/' +search_url = url + 'search/{search_term}/{pageno}/' +categories = ['videos'] +paging = True + + +def request(query, params): + params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno']) + + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath('//table[contains(@class, "table-list")]/tbody//tr'): + href = urljoin(url, result.xpath('./td[contains(@class, "name")]/a[2]/@href')[0]) + title = extract_text(result.xpath('./td[contains(@class, "name")]/a[2]')) + seed = extract_text(result.xpath('.//td[contains(@class, "seeds")]')) + leech = extract_text(result.xpath('.//td[contains(@class, "leeches")]')) + filesize_info = extract_text(result.xpath('.//td[contains(@class, "size")]/text()')) + filesize, filesize_multiplier = filesize_info.split() + filesize = get_torrent_size(filesize, filesize_multiplier) + + results.append({'url': href, + 'title': title, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'template': 'torrent.html'}) + + return results diff --git a/searx/engines/google.py b/searx/engines/google.py index 803cd307e..2fa638d73 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -217,6 +217,10 @@ def response(resp): # convert the text to dom dom = html.fromstring(resp.text) + instant_answer = dom.xpath('//div[@id="_vBb"]//text()') + if instant_answer: + results.append({'answer': u' '.join(instant_answer)}) + # parse results for result in dom.xpath(results_xpath): try: diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 49c6a5d50..6b79ff5c8 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -66,11 +66,14 @@ def response(resp): # parse results for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): - r = { - 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], - 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), - 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), - } + try: + r = { + 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], + 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), + 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), + } + except: + continue imgs = result.xpath('.//img/@src') if len(imgs) and not imgs[0].startswith('data'): |