summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2019-08-05 15:57:33 +0200
committerGitHub <noreply@github.com>2019-08-05 15:57:33 +0200
commit12f891da8496fad1f1fa24eeb52b1b19f03f8678 (patch)
treefe054e6a10efe9496419fbcd7b3d8a918be52a17
parent91a59ff3502803611953820e2923348440f807d0 (diff)
parent1cee2c1796f50b2646c8dee0fcc5e9df754c9246 (diff)
Merge pull request #1669 from dalf/engine-fixes
Engine fixes
-rw-r--r--searx/engines/arxiv.py2
-rw-r--r--searx/engines/bing.py2
-rw-r--r--searx/engines/dictzone.py2
-rw-r--r--searx/engines/fdroid.py29
-rw-r--r--searx/settings.yml16
-rw-r--r--tests/unit/engines/test_arxiv.py2
-rw-r--r--tests/unit/engines/test_fdroid.py55
7 files changed, 58 insertions, 50 deletions
diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py
index 5ef84f0c1..182861892 100644
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -29,7 +29,7 @@ def request(query, params):
# basic search
offset = (params['pageno'] - 1) * number_of_results
- string_args = dict(query=query,
+ string_args = dict(query=query.decode('utf-8'),
offset=offset,
number_of_results=number_of_results)
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 742379c1a..ba22cc6b4 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -47,8 +47,6 @@ def request(query, params):
params['url'] = base_url + search_path
- params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64')
-
return params
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
index 7cc44df73..09db048cc 100644
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -15,7 +15,7 @@ from searx.utils import is_valid_lang
from searx.url_utils import urljoin
categories = ['general']
-url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py
index a6b01a8ee..4066dc716 100644
--- a/searx/engines/fdroid.py
+++ b/searx/engines/fdroid.py
@@ -18,13 +18,13 @@ categories = ['files']
paging = True
# search-url
-base_url = 'https://f-droid.org/'
-search_url = base_url + 'repository/browse/?{query}'
+base_url = 'https://search.f-droid.org/'
+search_url = base_url + '?{query}'
# do search-request
def request(query, params):
- query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
+ query = urlencode({'q': query, 'page': params['pageno'], 'lang': ''})
params['url'] = search_url.format(query=query)
return params
@@ -35,17 +35,16 @@ def response(resp):
dom = html.fromstring(resp.text)
- for app in dom.xpath('//div[@id="appheader"]'):
- url = app.xpath('./ancestor::a/@href')[0]
- title = app.xpath('./p/span/text()')[0]
- img_src = app.xpath('.//img/@src')[0]
-
- content = extract_text(app.xpath('./p')[0])
- content = content.replace(title, '', 1).strip()
-
- results.append({'url': url,
- 'title': title,
- 'content': content,
- 'img_src': img_src})
+ for app in dom.xpath('//a[@class="package-header"]'):
+ app_url = app.xpath('./@href')[0]
+ app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()'))
+ app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \
+ + ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip()
+ app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0]
+
+ results.append({'url': app_url,
+ 'title': app_title,
+ 'content': app_content,
+ 'img_src': app_img_src})
return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 6659c1298..cf2b13e08 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -204,11 +204,11 @@ engines:
- name : etymonline
engine : xpath
paging : True
- search_url : http://etymonline.com/?search={query}&p={pageno}
- url_xpath : //a[contains(@class, "word--")]/@href
- title_xpath : //p[contains(@class, "word__name--")]/text()
- content_xpath : //section[contains(@class, "word__defination")]/object
- first_page_num : 0
+ search_url : https://etymonline.com/search?page={pageno}&q={query}
+ url_xpath : //a[contains(@class, "word__name--")]/@href
+ title_xpath : //a[contains(@class, "word__name--")]
+ content_xpath : //section[contains(@class, "word__defination")]
+ first_page_num : 1
shortcut : et
disabled : True
@@ -703,9 +703,9 @@ engines:
shortcut: vo
categories: social media
search_url : https://searchvoat.co/?t={query}
- url_xpath : //div[@class="entry"]/p/a[@class="title"]/@href
- title_xpath : //div[@class="entry"]/p/a[@class="title"]
- content_xpath : //div[@class="entry"]/p/span[@class="domain"]
+ url_xpath : //div[@class="entry"]/p/a[contains(@class, "title")]/@href
+ title_xpath : //div[@class="entry"]/p/a[contains(@class, "title")]
+ content_xpath : //div[@class="entry"]/p/span[@class="domain"]/a/text()
timeout : 10.0
disabled : True
diff --git a/tests/unit/engines/test_arxiv.py b/tests/unit/engines/test_arxiv.py
index b32c0e605..83c4f8595 100644
--- a/tests/unit/engines/test_arxiv.py
+++ b/tests/unit/engines/test_arxiv.py
@@ -8,7 +8,7 @@ from searx.testing import SearxTestCase
class TestBaseEngine(SearxTestCase):
def test_request(self):
- query = 'test_query'
+ query = 'test_query'.encode('utf-8')
dicto = defaultdict(dict)
dicto['pageno'] = 1
params = arxiv.request(query, dicto)
diff --git a/tests/unit/engines/test_fdroid.py b/tests/unit/engines/test_fdroid.py
index d75f4f0b4..42a0a7148 100644
--- a/tests/unit/engines/test_fdroid.py
+++ b/tests/unit/engines/test_fdroid.py
@@ -13,29 +13,40 @@ class TestFdroidEngine(SearxTestCase):
params = fdroid.request(query, dic)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
- self.assertTrue('f-droid.org' in params['url'])
+ self.assertTrue('search.f-droid.org' in params['url'])
- def test_response(self):
+ def test_response_empty(self):
resp = mock.Mock(text='<html></html>')
self.assertEqual(fdroid.response(resp), [])
+ def test_response_oneresult(self):
html = """
- <a href="https://google.com/qwerty">
- <div id="appheader">
- <div style="float:left;padding-right:10px;">
- <img src="http://example.com/image.png"
- style="width:48px;border:none;">
- </div>
- <div style="float:right;">
- <p>Details...</p>
- </div>
- <p style="color:#000000;">
- <span style="font-size:20px;">Sample title</span>
- <br>
- Sample content
- </p>
- </div>
- </a>
+<!DOCTYPE html>
+<html>
+<head>
+ <title>test</title>
+</head>
+<body>
+ <div class="site-wrapper">
+ <div class="main-content">
+ <a class="package-header" href="https://example.com/app.url">
+ <img class="package-icon" src="https://example.com/appexample.logo.png" />
+
+ <div class="package-info">
+ <h4 class="package-name">
+ App Example 1
+ </h4>
+
+ <div class="package-desc">
+ <span class="package-summary">Description App Example 1</span>
+ <span class="package-license">GPL-3.0-only</span>
+ </div>
+ </div>
+ </a>
+ </div>
+ </div>
+</body>
+</html>
"""
resp = mock.Mock(text=html)
@@ -43,7 +54,7 @@ class TestFdroidEngine(SearxTestCase):
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
- self.assertEqual(results[0]['url'], 'https://google.com/qwerty')
- self.assertEqual(results[0]['title'], 'Sample title')
- self.assertEqual(results[0]['content'], 'Sample content')
- self.assertEqual(results[0]['img_src'], 'http://example.com/image.png')
+ self.assertEqual(results[0]['url'], 'https://example.com/app.url')
+ self.assertEqual(results[0]['title'], 'App Example 1')
+ self.assertEqual(results[0]['content'], 'Description App Example 1 - GPL-3.0-only')
+ self.assertEqual(results[0]['img_src'], 'https://example.com/appexample.logo.png')