diff options
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/engines/btdigg.py | 16 | ||||
| -rw-r--r-- | searx/engines/digbt.py | 58 | ||||
| -rw-r--r-- | searx/engines/json_engine.py | 2 | ||||
| -rw-r--r-- | searx/engines/xpath.py | 2 | ||||
| -rw-r--r-- | searx/results.py | 2 | ||||
| -rw-r--r-- | searx/settings.yml | 20 | ||||
| -rw-r--r-- | searx/utils.py | 18 |
7 files changed, 100 insertions, 18 deletions
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index c2b22f003..ea6baf1c8 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -16,6 +16,7 @@ from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size # engine dependent config categories = ['videos', 'music', 'files'] @@ -68,20 +69,7 @@ def response(resp): leech = 0 # convert filesize to byte if possible - try: - filesize = float(filesize) - - # convert filesize to byte - if filesize_multiplier == 'TB': - filesize = int(filesize * 1024 * 1024 * 1024 * 1024) - elif filesize_multiplier == 'GB': - filesize = int(filesize * 1024 * 1024 * 1024) - elif filesize_multiplier == 'MB': - filesize = int(filesize * 1024 * 1024) - elif filesize_multiplier == 'KB': - filesize = int(filesize * 1024) - except: - filesize = None + filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py new file mode 100644 index 000000000..c35327e8c --- /dev/null +++ b/searx/engines/digbt.py @@ -0,0 +1,58 @@ +""" + DigBT (Videos, Music, Files) + + @website https://digbt.org + @provide-api no + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, magnetlink +""" + +from urlparse import urljoin +from lxml import html +from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size + +categories = ['videos', 'music', 'files'] +paging = True + +URL = 'https://digbt.org' +SEARCH_URL = URL + '/search/{query}-time-{pageno}' +FILESIZE = 3 +FILESIZE_MULTIPLIER = 4 + + +def request(query, params): + params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno']) + + return params + + +def response(resp): + dom = html.fromstring(resp.content) + search_res = dom.xpath('.//td[@class="x-item"]') + + if not search_res: + return list() + + results = list() + for result in search_res: + url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) + title = result.xpath('.//a[@title]/text()')[0] + content = extract_text(result.xpath('.//div[@class="files"]')) + files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() + filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) + magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] + + results.append({'url': url, + 'title': title, + 'content': content, + 'filesize': filesize, + 'magnetlink': magnetlink, + 'seed': 'N/A', + 'leech': 'N/A', + 'template': 'torrent.html'}) + + return results diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index a824c38e5..4604c3cac 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -81,7 +81,7 @@ def request(query, params): fp = {'query': query} if paging and search_url.find('{pageno}') >= 0: - fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num params['url'] = search_url.format(**fp) params['query'] = query diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index e701c02bf..e5c0c5bea 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -87,7 +87,7 @@ def request(query, params): fp = {'query': query} if paging and search_url.find('{pageno}') >= 0: - fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num params['url'] = search_url.format(**fp) params['query'] = query diff --git a/searx/results.py b/searx/results.py index 9a4ec0b28..32832f199 100644 --- a/searx/results.py +++ b/searx/results.py @@ -28,7 +28,7 @@ def compare_urls(url_a, url_b): else: host_b = url_b.netloc - if host_a != host_b or url_a.query != url_b.query: + if host_a != host_b or url_a.query != url_b.query or url_a.fragment != url_b.fragment: return False # remove / from the end of the url if required diff --git a/searx/settings.yml b/searx/settings.yml index f0d33e50e..905f07472 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -87,7 +87,7 @@ engines: - name : btdigg engine : btdigg shortcut : bt - + - name : crossref engine : json_engine paging : True @@ -118,6 +118,12 @@ engines: weight : 2 disabled : True + - name : digbt + engine : digbt + shortcut : dbt + timeout : 6.0 + disabled : True + - name : digg engine : digg shortcut : dg @@ -281,6 +287,18 @@ engines: disabled : True shortcut : habr + - name : hoogle + engine : json_engine + paging : True + search_url : https://www.haskell.org/hoogle/?mode=json&hoogle={query}&start={pageno} + results_query : results + url_query : location + title_query : self + content_query : docs + page_size : 20 + categories : it + shortcut : ho + - name : ina engine : ina shortcut : in diff --git a/searx/utils.py b/searx/utils.py index aa8ce92a1..744142e36 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -237,3 +237,21 @@ def list_get(a_list, index, default=None): return a_list[index] else: return default + + +def get_torrent_size(filesize, filesize_multiplier): + try: + filesize = float(filesize) + + if filesize_multiplier == 'TB': + filesize = int(filesize * 1024 * 1024 * 1024 * 1024) + elif filesize_multiplier == 'GB': + filesize = int(filesize * 1024 * 1024 * 1024) + elif filesize_multiplier == 'MB': + filesize = int(filesize * 1024 * 1024) + elif filesize_multiplier == 'KB': + filesize = int(filesize * 1024) + except: + filesize = None + + return filesize |