From a11948c71bfe7b2aac6e50e7634874d5073c7d84 Mon Sep 17 00:00:00 2001 From: marc Date: Sat, 29 Oct 2016 21:04:01 -0500 Subject: Add language support for more engines. --- tests/unit/engines/test_duckduckgo.py | 2 +- tests/unit/engines/test_duckduckgo_definitions.py | 4 ++++ tests/unit/engines/test_google.py | 2 +- tests/unit/engines/test_qwant.py | 2 +- tests/unit/engines/test_swisscows.py | 2 +- tests/unit/engines/test_wikipedia.py | 2 +- 6 files changed, 9 insertions(+), 5 deletions(-) (limited to 'tests/unit') diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py index 734f2c39e..b5a4fd4f0 100644 --- a/tests/unit/engines/test_duckduckgo.py +++ b/tests/unit/engines/test_duckduckgo.py @@ -11,7 +11,7 @@ class TestDuckduckgoEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'de_CH' + dicto['language'] = 'de-CH' dicto['time_range'] = '' params = duckduckgo.request(query, dicto) self.assertIn('url', params) diff --git a/tests/unit/engines/test_duckduckgo_definitions.py b/tests/unit/engines/test_duckduckgo_definitions.py index 39da64175..feafe47ba 100644 --- a/tests/unit/engines/test_duckduckgo_definitions.py +++ b/tests/unit/engines/test_duckduckgo_definitions.py @@ -21,10 +21,14 @@ class TestDDGDefinitionsEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 + dicto['language'] = 'es' params = duckduckgo_definitions.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) self.assertIn('duckduckgo.com', params['url']) + self.assertIn('headers', params) + self.assertIn('Accept-Language', params['headers']) + self.assertIn('es', params['headers']['Accept-Language']) def test_response(self): self.assertRaises(AttributeError, duckduckgo_definitions.response, None) diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py index 8e73e2ab7..c83eb3bf0 100644 --- a/tests/unit/engines/test_google.py +++ b/tests/unit/engines/test_google.py @@ -18,7 +18,7 @@ class TestGoogleEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' dicto['time_range'] = '' params = google.request(query, dicto) self.assertIn('url', params) diff --git a/tests/unit/engines/test_qwant.py b/tests/unit/engines/test_qwant.py index 7d79d13d8..c4c0b0690 100644 --- a/tests/unit/engines/test_qwant.py +++ b/tests/unit/engines/test_qwant.py @@ -10,7 +10,7 @@ class TestQwantEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' qwant.categories = [''] params = qwant.request(query, dicto) self.assertIn('url', params) diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py index 3b4ce7b0f..dbbc044da 100644 --- a/tests/unit/engines/test_swisscows.py +++ b/tests/unit/engines/test_swisscows.py @@ -10,7 +10,7 @@ class TestSwisscowsEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'de_DE' + dicto['language'] = 'de-DE' params = swisscows.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index d1c44036d..431cf69c7 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -10,7 +10,7 @@ class TestWikipediaEngine(SearxTestCase): def test_request(self): query = 'test_query' dicto = defaultdict(dict) - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' params = wikipedia.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) -- cgit v1.2.3 From f62ce21f50b540315a708ebfbf36878ddec9d1c4 Mon Sep 17 00:00:00 2001 From: marc Date: Sat, 5 Nov 2016 20:51:38 -0600 Subject: [mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. --- tests/unit/engines/test_subtitleseeker.py | 2 +- tests/unit/engines/test_wikipedia.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tests/unit') diff --git a/tests/unit/engines/test_subtitleseeker.py b/tests/unit/engines/test_subtitleseeker.py index a641601b2..e499cd2d6 100644 --- a/tests/unit/engines/test_subtitleseeker.py +++ b/tests/unit/engines/test_subtitleseeker.py @@ -17,7 +17,7 @@ class TestSubtitleseekerEngine(SearxTestCase): def test_response(self): dicto = defaultdict(dict) - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' response = mock.Mock(search_params=dicto) self.assertRaises(AttributeError, subtitleseeker.response, None) diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 431cf69c7..3e2f47ea9 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -8,6 +8,8 @@ from searx.testing import SearxTestCase class TestWikipediaEngine(SearxTestCase): def test_request(self): + wikipedia.supported_languages = ['fr', 'en'] + query = 'test_query' dicto = defaultdict(dict) dicto['language'] = 'fr-FR' -- cgit v1.2.3 From e0c270bd72f7b2a40222e3ed264e25d36cb0fc30 Mon Sep 17 00:00:00 2001 From: marc Date: Tue, 13 Dec 2016 23:51:15 -0600 Subject: tests for language support in engines --- tests/unit/engines/test_duckduckgo.py | 11 +++++++++++ tests/unit/engines/test_gigablast.py | 6 ++++++ tests/unit/engines/test_subtitleseeker.py | 5 +++++ tests/unit/engines/test_wikipedia.py | 4 ++++ tests/unit/test_preferences.py | 23 ++++++++++++++++++++++- 5 files changed, 48 insertions(+), 1 deletion(-) (limited to 'tests/unit') diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py index b5a4fd4f0..2aeaa1880 100644 --- a/tests/unit/engines/test_duckduckgo.py +++ b/tests/unit/engines/test_duckduckgo.py @@ -19,6 +19,17 @@ class TestDuckduckgoEngine(SearxTestCase): self.assertIn('duckduckgo.com', params['url']) self.assertIn('ch-de', params['url']) + # when ddg uses non standard code + dicto['language'] = 'en-GB' + params = duckduckgo.request(query, dicto) + self.assertIn('uk-en', params['url']) + + # no country given + duckduckgo.supported_languages = ['de-CH', 'en-US'] + dicto['language'] = 'de' + params = duckduckgo.request(query, dicto) + self.assertIn('ch-de', params['url']) + def test_no_url_in_request_year_time_range(self): dicto = defaultdict(dict) query = 'test_query' diff --git a/tests/unit/engines/test_gigablast.py b/tests/unit/engines/test_gigablast.py index cb96f3cd1..0723b064c 100644 --- a/tests/unit/engines/test_gigablast.py +++ b/tests/unit/engines/test_gigablast.py @@ -15,6 +15,12 @@ class TestGigablastEngine(SearxTestCase): self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('gigablast.com' in params['url']) + self.assertTrue('xx' in params['url']) + + dicto['language'] = 'en-US' + params = gigablast.request(query, dicto) + self.assertTrue('en' in params['url']) + self.assertFalse('en-US' in params['url']) def test_response(self): self.assertRaises(AttributeError, gigablast.response, None) diff --git a/tests/unit/engines/test_subtitleseeker.py b/tests/unit/engines/test_subtitleseeker.py index e499cd2d6..a22ee74b9 100644 --- a/tests/unit/engines/test_subtitleseeker.py +++ b/tests/unit/engines/test_subtitleseeker.py @@ -10,6 +10,7 @@ class TestSubtitleseekerEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 + dicto['language'] = 'fr-FR' params = subtitleseeker.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) @@ -68,6 +69,10 @@ class TestSubtitleseekerEngine(SearxTestCase): self.assertIn('1039 Subs', results[0]['content']) self.assertIn('Alternative Title', results[0]['content']) + dicto['language'] = 'pt-BR' + results = subtitleseeker.response(response) + self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/') + html = """
diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 3e2f47ea9..0057277c5 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -29,6 +29,10 @@ class TestWikipediaEngine(SearxTestCase): params = wikipedia.request(query, dicto) self.assertIn('en', params['url']) + dicto['language'] = 'xx' + params = wikipedia.request(query, dicto) + self.assertIn('en', params['url']) + def test_response(self): dicto = defaultdict(dict) dicto['language'] = 'fr' diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py index c17350809..885c515e2 100644 --- a/tests/unit/test_preferences.py +++ b/tests/unit/test_preferences.py @@ -1,4 +1,4 @@ -from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, +from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting, MultipleChoiceSetting, PluginsSetting, ValidationException) from searx.testing import SearxTestCase @@ -88,6 +88,27 @@ class TestSettings(SearxTestCase): setting.parse('2') self.assertEquals(setting.get_value(), ['2']) + # search language settings + def test_lang_setting_valid_choice(self): + setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) + setting.parse('de') + self.assertEquals(setting.get_value(), 'de') + + def test_lang_setting_invalid_choice(self): + setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) + setting.parse('xx') + self.assertEquals(setting.get_value(), 'all') + + def test_lang_setting_old_cookie_choice(self): + setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) + setting.parse('es_XA') + self.assertEquals(setting.get_value(), 'es') + + def test_lang_setting_old_cookie_format(self): + setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) + setting.parse('es_ES') + self.assertEquals(setting.get_value(), 'es-ES') + # plugins settings def test_plugins_setting_all_default_enabled(self): plugin1 = PluginStub('plugin1', True) -- cgit v1.2.3 From af35eee10b98940c51c6e5e18629de514b4bd48d Mon Sep 17 00:00:00 2001 From: marc Date: Thu, 15 Dec 2016 00:34:43 -0600 Subject: tests for _fetch_supported_languages in engines and refactor method to make it testable without making requests --- tests/unit/engines/test_bing.py | 32 ++++++++++++ tests/unit/engines/test_dailymotion.py | 37 ++++++++++++++ tests/unit/engines/test_duckduckgo.py | 14 +++++ tests/unit/engines/test_gigablast.py | 25 +++++++++ tests/unit/engines/test_google.py | 57 +++++++++++++++++++++ tests/unit/engines/test_swisscows.py | 27 ++++++++++ tests/unit/engines/test_wikipedia.py | 93 ++++++++++++++++++++++++++++++++++ tests/unit/engines/test_yahoo.py | 30 +++++++++++ 8 files changed, 315 insertions(+) (limited to 'tests/unit') diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py index 886584229..61f8629d0 100644 --- a/tests/unit/engines/test_bing.py +++ b/tests/unit/engines/test_bing.py @@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'This should be the title') self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') self.assertEqual(results[0]['content'], 'This should be the content.') + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + results = bing._fetch_supported_languages(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ + + +
+
+
+
+
+
+
+
+
+
+
+ + + """ + response = mock.Mock(text=html) + languages = bing._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 3) + self.assertIn('es', languages) + self.assertIn('pt-BR', languages) + self.assertIn('pt-PT', languages) diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py index 368b3a7a5..72071af66 100644 --- a/tests/unit/engines/test_dailymotion.py +++ b/tests/unit/engines/test_dailymotion.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from collections import defaultdict import mock from searx.engines import dailymotion @@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase): results = dailymotion.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) + + def test_fetch_supported_languages(self): + json = r""" + {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans", + "localized_name":"Afrikaans","display_name":"Afrikaans"}, + {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629", + "localized_name":"Arabic","display_name":"Arabic"}, + {"code":"la","name":"Latin","native_name":null, + "localized_name":"Latin","display_name":"Latin"} + ]} + """ + response = mock.Mock(text=json) + languages = dailymotion._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 3) + self.assertIn('af', languages) + self.assertIn('ar', languages) + self.assertIn('la', languages) + + self.assertEqual(type(languages['af']), dict) + self.assertEqual(type(languages['ar']), dict) + self.assertEqual(type(languages['la']), dict) + + self.assertIn('name', languages['af']) + self.assertIn('name', languages['ar']) + self.assertNotIn('name', languages['la']) + + self.assertIn('english_name', languages['af']) + self.assertIn('english_name', languages['ar']) + self.assertIn('english_name', languages['la']) + + self.assertEqual(languages['af']['name'], 'Afrikaans') + self.assertEqual(languages['af']['english_name'], 'Afrikaans') + self.assertEqual(languages['ar']['name'], u'العربية') + self.assertEqual(languages['ar']['english_name'], 'Arabic') + self.assertEqual(languages['la']['english_name'], 'Latin') diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py index 2aeaa1880..7d6abad22 100644 --- a/tests/unit/engines/test_duckduckgo.py +++ b/tests/unit/engines/test_duckduckgo.py @@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') self.assertEqual(results[0]['content'], 'This should be the content.') + + def test_fetch_supported_languages(self): + js = """some code...regions:{ + "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)" + }some more code...""" + response = mock.Mock(text=js) + languages = duckduckgo._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 5) + self.assertIn('wt-WT', languages) + self.assertIn('es-AR', languages) + self.assertIn('en-AU', languages) + self.assertIn('de-AT', languages) + self.assertIn('fr-BE', languages) diff --git a/tests/unit/engines/test_gigablast.py b/tests/unit/engines/test_gigablast.py index 0723b064c..74bf89333 100644 --- a/tests/unit/engines/test_gigablast.py +++ b/tests/unit/engines/test_gigablast.py @@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'South by Southwest 2016') self.assertEqual(results[0]['url'], 'www.sxsw.com') self.assertEqual(results[0]['content'], 'This should be the content.') + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + results = gigablast._fetch_supported_languages(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ + + + + + + + + + + """ + response = mock.Mock(text=html) + languages = gigablast._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 2) + self.assertIn('en', languages) + self.assertIn('fr', languages) diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py index c83eb3bf0..0d56b1e7b 100644 --- a/tests/unit/engines/test_google.py +++ b/tests/unit/engines/test_google.py @@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase): self.assertEqual(results[0]['title'], '') self.assertEqual(results[0]['content'], '') self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + languages = google._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 0) + + html = u""" + + + + + + + + + +
+ + + + + + + + +
+ + + """ + response = mock.Mock(text=html) + languages = google._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 3) + + self.assertIn('en', languages) + self.assertIn('zh-CN', languages) + self.assertIn('zh-TW', languages) + + self.assertEquals(type(languages['en']), dict) + self.assertEquals(type(languages['zh-CN']), dict) + self.assertEquals(type(languages['zh-TW']), dict) + + self.assertIn('name', languages['en']) + self.assertIn('name', languages['zh-CN']) + self.assertIn('name', languages['zh-TW']) + + self.assertEquals(languages['en']['name'], 'English') + self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)') + self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)') diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py index dbbc044da..27f33d70a 100644 --- a/tests/unit/engines/test_swisscows.py +++ b/tests/unit/engines/test_swisscows.py @@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase): self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg') self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png') self.assertEqual(results[2]['template'], 'images.html') + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + languages = swisscows._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 0) + + html = """ + +
+
+
    +
  • +
  • +
  • +
+
+
+ + """ + response = mock.Mock(text=html) + languages = swisscows._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 3) + self.assertIn('de-CH', languages) + self.assertIn('fr-CH', languages) diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 0057277c5..988080b6a 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase): self.assertEqual(len(results), 2) self.assertEqual(results[1]['infobox'], u'披頭四樂隊') self.assertIn(u'披头士乐队...', results[1]['content']) + + def test_fetch_supported_languages(self): + html = u"""""" + response = mock.Mock(text=html) + languages = wikipedia._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 0) + + html = u""" + + +
+
+

Table header

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
NLanguageLanguage (local)WikiArticles
2SwedishSvenskasv3000000
3CebuanoSinugboanong Binisayaceb3000000
+

Table header

+ + + + + + + + + + + + + + + + + + + +
NLanguageLanguage (local)WikiArticles
2Norwegian (Bokmål)Norsk (Bokmål)no100000
+
+
+ + + """ + response = mock.Mock(text=html) + languages = wikipedia._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 3) + + self.assertIn('sv', languages) + self.assertIn('ceb', languages) + self.assertIn('no', languages) + + self.assertEqual(type(languages['sv']), dict) + self.assertEqual(type(languages['ceb']), dict) + self.assertEqual(type(languages['no']), dict) + + self.assertIn('name', languages['sv']) + self.assertIn('english_name', languages['sv']) + self.assertIn('articles', languages['sv']) + + self.assertEqual(languages['sv']['name'], 'Svenska') + self.assertEqual(languages['sv']['english_name'], 'Swedish') + self.assertEqual(languages['sv']['articles'], 3000000) + self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya') + self.assertEqual(languages['ceb']['english_name'], 'Cebuano') + self.assertEqual(languages['ceb']['articles'], 3000000) + self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)') + self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)') + self.assertEqual(languages['no']['articles'], 100000) diff --git a/tests/unit/engines/test_yahoo.py b/tests/unit/engines/test_yahoo.py index 303295e2f..82c4d99bb 100644 --- a/tests/unit/engines/test_yahoo.py +++ b/tests/unit/engines/test_yahoo.py @@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase): results = yahoo.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + results = yahoo._fetch_supported_languages(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ + +
+
+ + + + + + + +
+
+ + """ + response = mock.Mock(text=html) + languages = yahoo._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 3) + self.assertIn('ar', languages) + self.assertIn('zh-chs', languages) + self.assertIn('zh-cht', languages) -- cgit v1.2.3