diff options
| -rw-r--r-- | searx/engines/btdigg.py | 16 | ||||
| -rw-r--r-- | searx/engines/digbt.py | 58 | ||||
| -rw-r--r-- | searx/engines/json_engine.py | 2 | ||||
| -rw-r--r-- | searx/engines/xpath.py | 2 | ||||
| -rw-r--r-- | searx/results.py | 2 | ||||
| -rw-r--r-- | searx/settings.yml | 30 | ||||
| -rw-r--r-- | searx/templates/courgette/about.html | 12 | ||||
| -rw-r--r-- | searx/templates/courgette/base.html | 2 | ||||
| -rw-r--r-- | searx/templates/default/about.html | 12 | ||||
| -rw-r--r-- | searx/templates/default/base.html | 2 | ||||
| -rw-r--r-- | searx/templates/oscar/about.html | 12 | ||||
| -rw-r--r-- | searx/templates/oscar/base.html | 2 | ||||
| -rw-r--r-- | searx/templates/pix-art/about.html | 12 | ||||
| -rw-r--r-- | searx/templates/pix-art/base.html | 2 | ||||
| -rw-r--r-- | searx/templates/pix-art/index.html | 2 | ||||
| -rw-r--r-- | searx/templates/pix-art/results.html | 2 | ||||
| -rw-r--r-- | searx/utils.py | 18 | ||||
| -rw-r--r-- | tests/robot/test_basic.robot | 2 | ||||
| -rw-r--r-- | tests/unit/engines/test_digbt.py | 59 |
19 files changed, 191 insertions, 58 deletions
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index c2b22f003..ea6baf1c8 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -16,6 +16,7 @@ from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size # engine dependent config categories = ['videos', 'music', 'files'] @@ -68,20 +69,7 @@ def response(resp): leech = 0 # convert filesize to byte if possible - try: - filesize = float(filesize) - - # convert filesize to byte - if filesize_multiplier == 'TB': - filesize = int(filesize * 1024 * 1024 * 1024 * 1024) - elif filesize_multiplier == 'GB': - filesize = int(filesize * 1024 * 1024 * 1024) - elif filesize_multiplier == 'MB': - filesize = int(filesize * 1024 * 1024) - elif filesize_multiplier == 'KB': - filesize = int(filesize * 1024) - except: - filesize = None + filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py new file mode 100644 index 000000000..c35327e8c --- /dev/null +++ b/searx/engines/digbt.py @@ -0,0 +1,58 @@ +""" + DigBT (Videos, Music, Files) + + @website https://digbt.org + @provide-api no + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, magnetlink +""" + +from urlparse import urljoin +from lxml import html +from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size + +categories = ['videos', 'music', 'files'] +paging = True + +URL = 'https://digbt.org' +SEARCH_URL = URL + '/search/{query}-time-{pageno}' +FILESIZE = 3 +FILESIZE_MULTIPLIER = 4 + + +def request(query, params): + params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno']) + + return params + + +def response(resp): + dom = html.fromstring(resp.content) + search_res = dom.xpath('.//td[@class="x-item"]') + + if not search_res: + return list() + + results = list() + for result in search_res: + url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) + title = result.xpath('.//a[@title]/text()')[0] + content = extract_text(result.xpath('.//div[@class="files"]')) + files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() + filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) + magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] + + results.append({'url': url, + 'title': title, + 'content': content, + 'filesize': filesize, + 'magnetlink': magnetlink, + 'seed': 'N/A', + 'leech': 'N/A', + 'template': 'torrent.html'}) + + return results diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index a824c38e5..4604c3cac 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -81,7 +81,7 @@ def request(query, params): fp = {'query': query} if paging and search_url.find('{pageno}') >= 0: - fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num params['url'] = search_url.format(**fp) params['query'] = query diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index e701c02bf..e5c0c5bea 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -87,7 +87,7 @@ def request(query, params): fp = {'query': query} if paging and search_url.find('{pageno}') >= 0: - fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num params['url'] = search_url.format(**fp) params['query'] = query diff --git a/searx/results.py b/searx/results.py index 9a4ec0b28..32832f199 100644 --- a/searx/results.py +++ b/searx/results.py @@ -28,7 +28,7 @@ def compare_urls(url_a, url_b): else: host_b = url_b.netloc - if host_a != host_b or url_a.query != url_b.query: + if host_a != host_b or url_a.query != url_b.query or url_a.fragment != url_b.fragment: return False # remove / from the end of the url if required diff --git a/searx/settings.yml b/searx/settings.yml index f0d33e50e..2c034a863 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -84,10 +84,6 @@ engines: disabled : True shortcut : bb - - name : btdigg - engine : btdigg - shortcut : bt - - name : crossref engine : json_engine paging : True @@ -118,6 +114,12 @@ engines: weight : 2 disabled : True + - name : digbt + engine : digbt + shortcut : dbt + timeout : 6.0 + disabled : True + - name : digg engine : digg shortcut : dg @@ -281,6 +283,18 @@ engines: disabled : True shortcut : habr + - name : hoogle + engine : json_engine + paging : True + search_url : https://www.haskell.org/hoogle/?mode=json&hoogle={query}&start={pageno} + results_query : results + url_query : location + title_query : self + content_query : docs + page_size : 20 + categories : it + shortcut : ho + - name : ina engine : ina shortcut : in @@ -332,7 +346,8 @@ engines: - name : piratebay engine : piratebay shortcut : tpb - disabled : True + url: https://pirateproxy.red/ + timeout : 3.0 - name : qwant engine : qwant @@ -420,11 +435,6 @@ engines: timeout : 6.0 disabled : True - - name : torrentz - engine : torrentz - timeout : 5.0 - shortcut : to - - name : twitter engine : twitter shortcut : tw diff --git a/searx/templates/courgette/about.html b/searx/templates/courgette/about.html index 2945e1f7b..3855d4682 100644 --- a/searx/templates/courgette/about.html +++ b/searx/templates/courgette/about.html @@ -6,20 +6,20 @@ <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, aggregating the results of other <a href="{{ url_for('preferences') }}">search engines</a> while not storing information about its users. </p> - <h2>Why use Searx?</h2> + <h2>Why use searx?</h2> <ul> - <li>Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> - <li>Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> - <li>Searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> + <li>searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> + <li>searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> + <li>searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> </ul> <p>If you do care about privacy, want to be a conscious user, or otherwise believe - in digital freedom, make Searx your default search engine or run it on your own server</p> + in digital freedom, make searx your default search engine or run it on your own server</p> <h2>Technical details - How does it work?</h2> <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br /> -It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br /> +It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, searx uses the search bar to perform GET requests.<br /> Searx can be added to your browser's search bar; moreover, it can be set as the default search engine. </p> diff --git a/searx/templates/courgette/base.html b/searx/templates/courgette/base.html index 276fae870..b2c70a3b7 100644 --- a/searx/templates/courgette/base.html +++ b/searx/templates/courgette/base.html @@ -2,7 +2,7 @@ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"{% if rtl %} dir="rtl"{% endif %}> <head> <meta charset="UTF-8" /> - <meta name="description" content="Searx - a privacy-respecting, hackable metasearch engine" /> + <meta name="description" content="searx - a privacy-respecting, hackable metasearch engine" /> <meta name="keywords" content="searx, search, search engine, metasearch, meta search" /> <meta name="generator" content="searx/{{ searx_version }}"> <meta name="referrer" content="no-referrer"> diff --git a/searx/templates/default/about.html b/searx/templates/default/about.html index 1b5fc34c0..f21a6f296 100644 --- a/searx/templates/default/about.html +++ b/searx/templates/default/about.html @@ -6,20 +6,20 @@ <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, aggregating the results of other <a href="{{ url_for('preferences') }}">search engines</a> while not storing information about its users. </p> - <h2>Why use Searx?</h2> + <h2>Why use searx?</h2> <ul> - <li>Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> - <li>Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> - <li>Searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> + <li>searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> + <li>searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> + <li>searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> </ul> <p>If you do care about privacy, want to be a conscious user, or otherwise believe - in digital freedom, make Searx your default search engine or run it on your own server</p> + in digital freedom, make searx your default search engine or run it on your own server</p> <h2>Technical details - How does it work?</h2> <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br /> -It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, if Searx used from the search bar it performs GET requests.<br /> +It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, if searx used from the search bar it performs GET requests.<br /> Searx can be added to your browser's search bar; moreover, it can be set as the default search engine. </p> diff --git a/searx/templates/default/base.html b/searx/templates/default/base.html index 143bdb8d2..a2c38fef7 100644 --- a/searx/templates/default/base.html +++ b/searx/templates/default/base.html @@ -2,7 +2,7 @@ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"{% if rtl %} dir="rtl"{% endif %}> <head> <meta charset="UTF-8" /> - <meta name="description" content="Searx - a privacy-respecting, hackable metasearch engine" /> + <meta name="description" content="searx - a privacy-respecting, hackable metasearch engine" /> <meta name="keywords" content="searx, search, search engine, metasearch, meta search" /> <meta name="generator" content="searx/{{ searx_version }}"> <meta name="referrer" content="no-referrer"> diff --git a/searx/templates/oscar/about.html b/searx/templates/oscar/about.html index 39ef3663e..d42b783c7 100644 --- a/searx/templates/oscar/about.html +++ b/searx/templates/oscar/about.html @@ -7,20 +7,20 @@ <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, aggregating the results of other <a href="{{ url_for('preferences') }}">search engines</a> while not storing information about its users. </p> - <h2>Why use Searx?</h2> + <h2>Why use searx?</h2> <ul> - <li>Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> - <li>Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> - <li>Searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> + <li>searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> + <li>searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> + <li>searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> </ul> <p>If you do care about privacy, want to be a conscious user, or otherwise believe - in digital freedom, make Searx your default search engine or run it on your own server</p> + in digital freedom, make searx your default search engine or run it on your own server</p> <h2>Technical details - How does it work?</h2> <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br /> -It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br /> +It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, searx uses the search bar to perform GET requests.<br /> Searx can be added to your browser's search bar; moreover, it can be set as the default search engine. </p> diff --git a/searx/templates/oscar/base.html b/searx/templates/oscar/base.html index a1f1c1a90..d3170d622 100644 --- a/searx/templates/oscar/base.html +++ b/searx/templates/oscar/base.html @@ -2,7 +2,7 @@ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"{% if rtl %} dir="rtl"{% endif %}> <head> <meta charset="UTF-8" /> - <meta name="description" content="Searx - a privacy-respecting, hackable metasearch engine" /> + <meta name="description" content="searx - a privacy-respecting, hackable metasearch engine" /> <meta name="keywords" content="searx, search, search engine, metasearch, meta search" /> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="generator" content="searx/{{ searx_version }}"> diff --git a/searx/templates/pix-art/about.html b/searx/templates/pix-art/about.html index cb4b351f8..6484b8526 100644 --- a/searx/templates/pix-art/about.html +++ b/searx/templates/pix-art/about.html @@ -5,20 +5,20 @@ <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, aggregating the results of other <a href="{{ url_for('preferences') }}">search engines</a> while not storing information about its users. </p> - <h2>Why use Searx?</h2> + <h2>Why use searx?</h2> <ul> - <li>Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> - <li>Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> - <li>Searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> + <li>searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li> + <li>searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li> + <li>searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li> </ul> <p>If you do care about privacy, want to be a conscious user, or otherwise believe - in digital freedom, make Searx your default search engine or run it on your own server</p> + in digital freedom, make searx your default search engine or run it on your own server</p> <h2>Technical details - How does it work?</h2> <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>, inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br /> -It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, if Searx used from the search bar it performs GET requests.<br /> +It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, if searx used from the search bar it performs GET requests.<br /> Searx can be added to your browser's search bar; moreover, it can be set as the default search engine. </p> diff --git a/searx/templates/pix-art/base.html b/searx/templates/pix-art/base.html index 578180c84..6af8823cc 100644 --- a/searx/templates/pix-art/base.html +++ b/searx/templates/pix-art/base.html @@ -2,7 +2,7 @@ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"{% if rtl %} dir="rtl"{% endif %}> <head> <meta charset="UTF-8" /> - <meta name="description" content="Searx - a privacy-respecting, hackable metasearch engine" /> + <meta name="description" content="searx - a privacy-respecting, hackable metasearch engine" /> <meta name="keywords" content="searx, search, search engine, metasearch, meta search" /> <meta name="generator" content="searx/{{ searx_version }}"> <meta name="referrer" content="no-referrer"> diff --git a/searx/templates/pix-art/index.html b/searx/templates/pix-art/index.html index d398cc829..a0c61f975 100644 --- a/searx/templates/pix-art/index.html +++ b/searx/templates/pix-art/index.html @@ -1,7 +1,7 @@ {% extends "pix-art/base.html" %} {% block content %} <div class="center"> - <div class="title"><h1><img src="{{ url_for('static', filename='img/searx-pixel.png') }}" alt="Searx Logo"/></h1></div> + <div class="title"><h1><img src="{{ url_for('static', filename='img/searx-pixel.png') }}" alt="searx Logo"/></h1></div> {% include 'pix-art/search.html' %} <p class="top_margin"> <a href="{{ url_for('about') }}" class="hmarg">{{ _('about') }}</a> diff --git a/searx/templates/pix-art/results.html b/searx/templates/pix-art/results.html index 9385b608a..f7d0e209b 100644 --- a/searx/templates/pix-art/results.html +++ b/searx/templates/pix-art/results.html @@ -8,7 +8,7 @@ {% block title %}{{ q }} - {% endblock %} {% block meta %}{% endblock %} {% block content %} -<div id="logo"><a href="./"><img src="{{ url_for('static', filename='img/searx-pixel-small.png') }}" alt="Searx Logo"/></a></div> +<div id="logo"><a href="./"><img src="{{ url_for('static', filename='img/searx-pixel-small.png') }}" alt="searx Logo"/></a></div> <div class="preferences_container right"><a href="{{ url_for('preferences') }}" id="preferences"><span>preferences</span></a></div> <div class="small search center"> {% include 'pix-art/search.html' %} diff --git a/searx/utils.py b/searx/utils.py index aa8ce92a1..744142e36 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -237,3 +237,21 @@ def list_get(a_list, index, default=None): return a_list[index] else: return default + + +def get_torrent_size(filesize, filesize_multiplier): + try: + filesize = float(filesize) + + if filesize_multiplier == 'TB': + filesize = int(filesize * 1024 * 1024 * 1024 * 1024) + elif filesize_multiplier == 'GB': + filesize = int(filesize * 1024 * 1024 * 1024) + elif filesize_multiplier == 'MB': + filesize = int(filesize * 1024 * 1024) + elif filesize_multiplier == 'KB': + filesize = int(filesize * 1024) + except: + filesize = None + + return filesize diff --git a/tests/robot/test_basic.robot b/tests/robot/test_basic.robot index 4f9b8ae05..a455eeaa0 100644 --- a/tests/robot/test_basic.robot +++ b/tests/robot/test_basic.robot @@ -16,7 +16,7 @@ Front page About page Click Element link=about - Page Should Contain Why use Searx? + Page Should Contain Why use searx? Page Should Contain Element link=search engines Preferences page diff --git a/tests/unit/engines/test_digbt.py b/tests/unit/engines/test_digbt.py new file mode 100644 index 000000000..867188ed9 --- /dev/null +++ b/tests/unit/engines/test_digbt.py @@ -0,0 +1,59 @@ +from collections import defaultdict +import mock +from searx.engines import digbt +from searx.testing import SearxTestCase + + +class TestDigBTEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = digbt.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('digbt.org', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, digbt.response, None) + self.assertRaises(AttributeError, digbt.response, []) + self.assertRaises(AttributeError, digbt.response, '') + self.assertRaises(AttributeError, digbt.response, '[]') + + response = mock.Mock(content='<html></html>') + self.assertEqual(digbt.response(response), []) + + html = """ + <table class="table"> + <tr><td class="x-item"> + <div> + <a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a> + <span class="ctime"><span style="color:red;">4 hours ago</span></span> + </div> + <div class="files"> + <ul> + <li>The Big Bang Theory 2.9 GB</li> + <li>....</li> + </ul> + </div> + <div class="tail"> + Files: 1 Size: 2.9 GB Downloads: 1 Updated: <span style="color:red;">4 hours ago</span> + + <a class="title" href="magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory"> + <span class="glyphicon glyphicon-magnet"></span> magnet-link + </a> + + </div> + </td></tr> + </table> + """ + response = mock.Mock(content=html) + results = digbt.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'The Big Bang Theory') + self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html') + self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....') + self.assertEqual(results[0]['filesize'], 3113851289) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory') |