diff options
| author | Alexandre Flament <alex@al-f.net> | 2019-08-05 15:57:33 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-08-05 15:57:33 +0200 |
| commit | 12f891da8496fad1f1fa24eeb52b1b19f03f8678 (patch) | |
| tree | fe054e6a10efe9496419fbcd7b3d8a918be52a17 | |
| parent | 91a59ff3502803611953820e2923348440f807d0 (diff) | |
| parent | 1cee2c1796f50b2646c8dee0fcc5e9df754c9246 (diff) | |
Merge pull request #1669 from dalf/engine-fixes
Engine fixes
| -rw-r--r-- | searx/engines/arxiv.py | 2 | ||||
| -rw-r--r-- | searx/engines/bing.py | 2 | ||||
| -rw-r--r-- | searx/engines/dictzone.py | 2 | ||||
| -rw-r--r-- | searx/engines/fdroid.py | 29 | ||||
| -rw-r--r-- | searx/settings.yml | 16 | ||||
| -rw-r--r-- | tests/unit/engines/test_arxiv.py | 2 | ||||
| -rw-r--r-- | tests/unit/engines/test_fdroid.py | 55 |
7 files changed, 58 insertions, 50 deletions
diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 5ef84f0c1..182861892 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -29,7 +29,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=query, + string_args = dict(query=query.decode('utf-8'), offset=offset, number_of_results=number_of_results) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 742379c1a..ba22cc6b4 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -47,8 +47,6 @@ def request(query, params): params['url'] = base_url + search_path - params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64') - return params diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 7cc44df73..09db048cc 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -15,7 +15,7 @@ from searx.utils import is_valid_lang from searx.url_utils import urljoin categories = ['general'] -url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' +url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index a6b01a8ee..4066dc716 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -18,13 +18,13 @@ categories = ['files'] paging = True # search-url -base_url = 'https://f-droid.org/' -search_url = base_url + 'repository/browse/?{query}' +base_url = 'https://search.f-droid.org/' +search_url = base_url + '?{query}' # do search-request def request(query, params): - query = urlencode({'fdfilter': query, 'fdpage': params['pageno']}) + query = urlencode({'q': query, 'page': params['pageno'], 'lang': ''}) params['url'] = search_url.format(query=query) return params @@ -35,17 +35,16 @@ def response(resp): dom = html.fromstring(resp.text) - for app in dom.xpath('//div[@id="appheader"]'): - url = app.xpath('./ancestor::a/@href')[0] - title = app.xpath('./p/span/text()')[0] - img_src = app.xpath('.//img/@src')[0] - - content = extract_text(app.xpath('./p')[0]) - content = content.replace(title, '', 1).strip() - - results.append({'url': url, - 'title': title, - 'content': content, - 'img_src': img_src}) + for app in dom.xpath('//a[@class="package-header"]'): + app_url = app.xpath('./@href')[0] + app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()')) + app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \ + + ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip() + app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0] + + results.append({'url': app_url, + 'title': app_title, + 'content': app_content, + 'img_src': app_img_src}) return results diff --git a/searx/settings.yml b/searx/settings.yml index 6659c1298..cf2b13e08 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -204,11 +204,11 @@ engines: - name : etymonline engine : xpath paging : True - search_url : http://etymonline.com/?search={query}&p={pageno} - url_xpath : //a[contains(@class, "word--")]/@href - title_xpath : //p[contains(@class, "word__name--")]/text() - content_xpath : //section[contains(@class, "word__defination")]/object - first_page_num : 0 + search_url : https://etymonline.com/search?page={pageno}&q={query} + url_xpath : //a[contains(@class, "word__name--")]/@href + title_xpath : //a[contains(@class, "word__name--")] + content_xpath : //section[contains(@class, "word__defination")] + first_page_num : 1 shortcut : et disabled : True @@ -703,9 +703,9 @@ engines: shortcut: vo categories: social media search_url : https://searchvoat.co/?t={query} - url_xpath : //div[@class="entry"]/p/a[@class="title"]/@href - title_xpath : //div[@class="entry"]/p/a[@class="title"] - content_xpath : //div[@class="entry"]/p/span[@class="domain"] + url_xpath : //div[@class="entry"]/p/a[contains(@class, "title")]/@href + title_xpath : //div[@class="entry"]/p/a[contains(@class, "title")] + content_xpath : //div[@class="entry"]/p/span[@class="domain"]/a/text() timeout : 10.0 disabled : True diff --git a/tests/unit/engines/test_arxiv.py b/tests/unit/engines/test_arxiv.py index b32c0e605..83c4f8595 100644 --- a/tests/unit/engines/test_arxiv.py +++ b/tests/unit/engines/test_arxiv.py @@ -8,7 +8,7 @@ from searx.testing import SearxTestCase class TestBaseEngine(SearxTestCase): def test_request(self): - query = 'test_query' + query = 'test_query'.encode('utf-8') dicto = defaultdict(dict) dicto['pageno'] = 1 params = arxiv.request(query, dicto) diff --git a/tests/unit/engines/test_fdroid.py b/tests/unit/engines/test_fdroid.py index d75f4f0b4..42a0a7148 100644 --- a/tests/unit/engines/test_fdroid.py +++ b/tests/unit/engines/test_fdroid.py @@ -13,29 +13,40 @@ class TestFdroidEngine(SearxTestCase): params = fdroid.request(query, dic) self.assertTrue('url' in params) self.assertTrue(query in params['url']) - self.assertTrue('f-droid.org' in params['url']) + self.assertTrue('search.f-droid.org' in params['url']) - def test_response(self): + def test_response_empty(self): resp = mock.Mock(text='<html></html>') self.assertEqual(fdroid.response(resp), []) + def test_response_oneresult(self): html = """ - <a href="https://google.com/qwerty"> - <div id="appheader"> - <div style="float:left;padding-right:10px;"> - <img src="http://example.com/image.png" - style="width:48px;border:none;"> - </div> - <div style="float:right;"> - <p>Details...</p> - </div> - <p style="color:#000000;"> - <span style="font-size:20px;">Sample title</span> - <br> - Sample content - </p> - </div> - </a> +<!DOCTYPE html> +<html> +<head> + <title>test</title> +</head> +<body> + <div class="site-wrapper"> + <div class="main-content"> + <a class="package-header" href="https://example.com/app.url"> + <img class="package-icon" src="https://example.com/appexample.logo.png" /> + + <div class="package-info"> + <h4 class="package-name"> + App Example 1 + </h4> + + <div class="package-desc"> + <span class="package-summary">Description App Example 1</span> + <span class="package-license">GPL-3.0-only</span> + </div> + </div> + </a> + </div> + </div> +</body> +</html> """ resp = mock.Mock(text=html) @@ -43,7 +54,7 @@ class TestFdroidEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 1) - self.assertEqual(results[0]['url'], 'https://google.com/qwerty') - self.assertEqual(results[0]['title'], 'Sample title') - self.assertEqual(results[0]['content'], 'Sample content') - self.assertEqual(results[0]['img_src'], 'http://example.com/image.png') + self.assertEqual(results[0]['url'], 'https://example.com/app.url') + self.assertEqual(results[0]['title'], 'App Example 1') + self.assertEqual(results[0]['content'], 'Description App Example 1 - GPL-3.0-only') + self.assertEqual(results[0]['img_src'], 'https://example.com/appexample.logo.png') |