summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--searx/engines/duckduckgo_definitions.py19
-rw-r--r--searx/engines/framalibre.py5
-rw-r--r--searx/engines/google.py5
-rw-r--r--searx/engines/google_images.py16
-rw-r--r--searx/engines/seedpeer.py78
-rw-r--r--searx/engines/soundcloud.py4
-rw-r--r--searx/settings.yml7
-rw-r--r--searx/templates/courgette/result_templates/torrent.html2
-rw-r--r--searx/templates/legacy/result_templates/torrent.html2
-rw-r--r--searx/templates/oscar/result_templates/torrent.html2
-rw-r--r--searx/templates/simple/result_templates/torrent.html2
-rw-r--r--searx/webapp.py5
-rw-r--r--tests/unit/engines/test_seedpeer.py66
13 files changed, 197 insertions, 16 deletions
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 957a13ea6..79d10c303 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,3 +1,14 @@
+"""
+DuckDuckGo (definitions)
+
+- `Instant Answer API`_
+- `DuckDuckGo query`_
+
+.. _Instant Answer API: https://duckduckgo.com/api
+.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1
+
+"""
+
import json
from lxml import html
from re import compile
@@ -25,7 +36,8 @@ def result_to_text(url, text, htmlResult):
def request(query, params):
params['url'] = url.format(query=urlencode({'q': query}))
language = match_language(params['language'], supported_languages, language_aliases)
- params['headers']['Accept-Language'] = language.split('-')[0]
+ language = language.split('-')[0]
+ params['headers']['Accept-Language'] = language
return params
@@ -43,8 +55,9 @@ def response(resp):
# add answer if there is one
answer = search_res.get('Answer', '')
- if answer != '':
- results.append({'answer': html_to_text(answer)})
+ if answer:
+ if search_res.get('AnswerType', '') not in ['calc']:
+ results.append({'answer': html_to_text(answer)})
# add infobox
if 'Definition' in search_res:
diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py
index 146cdaeec..f3441fa5f 100644
--- a/searx/engines/framalibre.py
+++ b/searx/engines/framalibre.py
@@ -10,7 +10,10 @@
@parse url, title, content, thumbnail, img_src
"""
-from cgi import escape
+try:
+ from cgi import escape
+except:
+ from html import escape
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urljoin, urlencode
diff --git a/searx/engines/google.py b/searx/engines/google.py
index a82b1f1f7..19bde710d 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -199,8 +199,9 @@ def request(query, params):
params['headers']['Accept-Language'] = language + ',' + language + '-' + country
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
- # Force Internet Explorer 12 user agent to avoid loading the new UI that Searx can't parse
- params['headers']['User-Agent'] = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
+ # Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse
+ params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)"
+ "AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1")
params['google_hostname'] = google_hostname
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index d9a49e9cc..636913114 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -70,11 +70,21 @@ def response(resp):
try:
metadata = loads(result)
- img_format = "{0} {1}x{2}".format(metadata['ity'], str(metadata['ow']), str(metadata['oh']))
- source = "{0} ({1})".format(metadata['st'], metadata['isu'])
+
+ img_format = metadata.get('ity', '')
+ img_width = metadata.get('ow', '')
+ img_height = metadata.get('oh', '')
+ if img_width and img_height:
+ img_format += " {0}x{1}".format(img_width, img_height)
+
+ source = metadata.get('st', '')
+ source_url = metadata.get('isu', '')
+ if source_url:
+ source += " ({0})".format(source_url)
+
results.append({'url': metadata['ru'],
'title': metadata['pt'],
- 'content': metadata['s'],
+ 'content': metadata.get('s', ''),
'source': source,
'img_format': img_format,
'thumbnail_src': metadata['tu'],
diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py
new file mode 100644
index 000000000..f9b1f99c8
--- /dev/null
+++ b/searx/engines/seedpeer.py
@@ -0,0 +1,78 @@
+# Seedpeer (Videos, Music, Files)
+#
+# @website https://seedpeer.me
+# @provide-api no (nothing found)
+#
+# @using-api no
+# @results HTML (using search portal)
+# @stable yes (HTML can change)
+# @parse url, title, content, seed, leech, magnetlink
+
+from lxml import html
+from json import loads
+from operator import itemgetter
+from searx.url_utils import quote, urljoin
+from searx.engines.xpath import extract_text
+
+
+url = 'https://seedpeer.me/'
+search_url = url + 'search/{search_term}?page={page_no}'
+torrent_file_url = url + 'torrent/{torrent_hash}'
+
+# specific xpath variables
+script_xpath = '//script[@type="text/javascript"][not(@src)]'
+torrent_xpath = '(//table)[2]/tbody/tr'
+link_xpath = '(./td)[1]/a/@href'
+age_xpath = '(./td)[2]'
+size_xpath = '(./td)[3]'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(search_term=quote(query),
+ page_no=params['pageno'])
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+ dom = html.fromstring(resp.text)
+ result_rows = dom.xpath(torrent_xpath)
+
+ try:
+ script_element = dom.xpath(script_xpath)[0]
+ json_string = script_element.text[script_element.text.find('{'):]
+ torrents_json = loads(json_string)
+ except:
+ return []
+
+ # parse results
+ for torrent_row, torrent_json in zip(result_rows, torrents_json['data']['list']):
+ title = torrent_json['name']
+ seed = int(torrent_json['seeds'])
+ leech = int(torrent_json['peers'])
+ size = int(torrent_json['size'])
+ torrent_hash = torrent_json['hash']
+
+ torrentfile = torrent_file_url.format(torrent_hash=torrent_hash)
+ magnetlink = 'magnet:?xt=urn:btih:{}'.format(torrent_hash)
+
+ age = extract_text(torrent_row.xpath(age_xpath))
+ link = torrent_row.xpath(link_xpath)[0]
+
+ href = urljoin(url, link)
+
+ # append result
+ results.append({'url': href,
+ 'title': title,
+ 'content': age,
+ 'seed': seed,
+ 'leech': leech,
+ 'filesize': size,
+ 'torrentfile': torrentfile,
+ 'magnetlink': magnetlink,
+ 'template': 'torrent.html'})
+
+ # return results sorted by seeder
+ return sorted(results, key=itemgetter('seed'), reverse=True)
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index 870998545..284689bf6 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -51,7 +51,9 @@ def get_client_id():
if response.ok:
tree = html.fromstring(response.content)
- script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
+ # script_tags has been moved from /assets/app/ to /assets/ path. I
+ # found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js
+ script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content
diff --git a/searx/settings.yml b/searx/settings.yml
index 835fbe5f6..c6f805331 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -744,10 +744,15 @@ engines:
title_xpath : ./h2
content_xpath : ./p[@class="s"]
suggestion_xpath : /html/body//div[@class="top-info"]/p[@class="top-info spell"]/a
- first_page_num : 1
+ first_page_num : 0
page_size : 10
disabled : True
+ - name : seedpeer
+ shortcut : speu
+ engine : seedpeer
+ categories: files, music, videos
+
# - name : yacy
# engine : yacy
# shortcut : ya
diff --git a/searx/templates/courgette/result_templates/torrent.html b/searx/templates/courgette/result_templates/torrent.html
index d659064d9..7f94a221e 100644
--- a/searx/templates/courgette/result_templates/torrent.html
+++ b/searx/templates/courgette/result_templates/torrent.html
@@ -4,7 +4,7 @@
{% endif %}
<h3 class="result_title"><a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3>
{% if result.content %}<span class="content">{{ result.content|safe }}</span><br />{% endif %}
- {% if result.seed %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span><br />{% endif %}
+ {% if result.seed is defined %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span><br />{% endif %}
<span>
{% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %}
{% if result.torrentfile %}<a href="{{ result.torrentfile }}" class="torrentfile" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('torrent file') }}</a>{% endif %}
diff --git a/searx/templates/legacy/result_templates/torrent.html b/searx/templates/legacy/result_templates/torrent.html
index 7a8ac33de..068e05373 100644
--- a/searx/templates/legacy/result_templates/torrent.html
+++ b/searx/templates/legacy/result_templates/torrent.html
@@ -8,6 +8,6 @@
<p>
{% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %}
{% if result.torrentfile %}<a href="{{ result.torrentfile }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %} class="torrentfile">{{ _('torrent file') }}</a>{% endif %} -
- {% if result.seed %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span>{% endif %}
+ {% if result.seed is defined %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span>{% endif %}
</p>
</div>
diff --git a/searx/templates/oscar/result_templates/torrent.html b/searx/templates/oscar/result_templates/torrent.html
index f5ea415e2..089367e36 100644
--- a/searx/templates/oscar/result_templates/torrent.html
+++ b/searx/templates/oscar/result_templates/torrent.html
@@ -3,7 +3,7 @@
{{ result_header(result, favicons) }}
{{ result_sub_header(result) }}
-{% if result.seed %}<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span>{% endif %}
+{% if result.seed is defined %}<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span>{% endif %}
{% if result.filesize %}<br />{{ icon('floppy-disk') }} {{ _('Filesize') }}
<span class="badge">
{% if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }}
diff --git a/searx/templates/simple/result_templates/torrent.html b/searx/templates/simple/result_templates/torrent.html
index 3c7fd15e8..71c775bc9 100644
--- a/searx/templates/simple/result_templates/torrent.html
+++ b/searx/templates/simple/result_templates/torrent.html
@@ -6,7 +6,7 @@
{% if result.magnetlink %}<p class="altlink"> &bull; {{ result_link(result.magnetlink, icon('magnet') + _('magnet link'), "magnetlink") }}</p>{% endif %}
{% if result.torrentfile %}<p class="altlink"> &bull; {{ result_link(result.torrentfile, icon('download-alt') + _('torrent file'), "torrentfile") }}</p>{% endif %}
-{% if result.seed %}<p class="stat"> &bull; {{ icon('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %}
+{% if result.seed is defined %}<p class="stat"> &bull; {{ icon('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %}
{%- if result.filesize %}<p class="stat">{{ icon('floppy-disk') }} {{ _('Filesize') }}<span class="badge">
{%- if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }}
diff --git a/searx/webapp.py b/searx/webapp.py
index 00895d96c..7cf4106d3 100644
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -41,7 +41,10 @@ except:
logger.critical("cannot import dependency: pygments")
from sys import exit
exit(1)
-from cgi import escape
+try:
+ from cgi import escape
+except:
+ from html import escape
from datetime import datetime, timedelta
from time import time
from werkzeug.contrib.fixers import ProxyFix
diff --git a/tests/unit/engines/test_seedpeer.py b/tests/unit/engines/test_seedpeer.py
new file mode 100644
index 000000000..2057c1cb1
--- /dev/null
+++ b/tests/unit/engines/test_seedpeer.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import seedpeer
+from searx.testing import SearxTestCase
+
+
+class TestBtdiggEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 1
+ params = seedpeer.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+ self.assertIn('seedpeer', params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, seedpeer.response, None)
+ self.assertRaises(AttributeError, seedpeer.response, [])
+ self.assertRaises(AttributeError, seedpeer.response, '')
+ self.assertRaises(AttributeError, seedpeer.response, '[]')
+
+ response = mock.Mock(text='<html></html>')
+ self.assertEqual(seedpeer.response(response), [])
+
+ html = u"""
+ <html>
+ <head>
+ <script></script>
+ <script type="text/javascript" src="not_here.js"></script>
+ <script type="text/javascript">
+ window.initialData=
+ {"data": {"list": [{"name": "Title", "seeds": "10", "peers": "20", "size": "1024", "hash": "abc123"}]}}
+ </script>
+ </head>
+ <body>
+ <table></table>
+ <table>
+ <thead><tr></tr></thead>
+ <tbody>
+ <tr>
+ <td><a href="link">Title</a></td>
+ <td>1 year</td>
+ <td>1 KB</td>
+ <td>10</td>
+ <td>20</td>
+ <td></td>
+ </tr>
+ </tbody>
+ </table>
+ </body>
+ </html>
+ """
+ response = mock.Mock(text=html)
+ results = seedpeer.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'Title')
+ self.assertEqual(results[0]['url'], 'https://seedpeer.me/link')
+ self.assertEqual(results[0]['seed'], 10)
+ self.assertEqual(results[0]['leech'], 20)
+ self.assertEqual(results[0]['filesize'], 1024)
+ self.assertEqual(results[0]['torrentfile'], 'https://seedpeer.me/torrent/abc123')
+ self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:abc123')