From ab471fd13b3891a5a924e8c2cd18a1079e7ac8e0 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 15:40:07 +0200 Subject: [enh] mymemory translated engine added for multi-word translations --- searx/engines/translated.py | 63 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 searx/engines/translated.py (limited to 'searx/engines/translated.py') diff --git a/searx/engines/translated.py b/searx/engines/translated.py new file mode 100644 index 000000000..9f194b76b --- /dev/null +++ b/searx/engines/translated.py @@ -0,0 +1,63 @@ +import re +from urlparse import urljoin +from lxml import html +from cgi import escape +from searx.engines.xpath import extract_text +from searx.languages import language_codes + +categories = ['general'] +url = 'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}' +web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' +weight = 100 + +parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) + +def is_valid_lang(lang): + is_abbr = (len(lang) == 2) + if is_abbr: + for l in language_codes: + if l[0][:2] == lang.lower(): + return (True, l[0][:2], l[1].lower()) + return False + else: + for l in language_codes: + if l[1].lower() == lang.lower(): + return (True, l[0][:2], l[1].lower()) + return False + + +def request(query, params): + m = parser_re.match(unicode(query, 'utf8')) + if not m: + return params + + from_lang, to_lang, query = m.groups() + + from_lang = is_valid_lang(from_lang) + to_lang = is_valid_lang(to_lang) + + if not from_lang or not to_lang: + return params + + params['url'] = url.format(from_lang=from_lang[1], + to_lang=to_lang[1], + query=query) + params['query'] = query + params['from_lang'] = from_lang + params['to_lang'] = to_lang + + return params + + +def response(resp): + results = [] + results.append({ + 'url': escape(web_url.format(from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'])), + 'title': escape('[{0}-{1}] {2}'.format(resp.search_params['from_lang'][1], + resp.search_params['to_lang'][1], + resp.search_params['query'])), + 'content': escape(resp.json()['responseData']['translatedText']) + }) + return results -- cgit v1.2.3 From c051e6a2c3e97419983d552594a6a8340339c1d5 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 15:44:05 +0200 Subject: [fix] pep8 --- searx/engines/translated.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'searx/engines/translated.py') diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 9f194b76b..2f535140c 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -6,12 +6,14 @@ from searx.engines.xpath import extract_text from searx.languages import language_codes categories = ['general'] -url = 'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}' +url = 'http://api.mymemory.translated.net/get?q={query}' \ + '&langpair={from_lang}|{to_lang}' web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) + def is_valid_lang(lang): is_abbr = (len(lang) == 2) if is_abbr: @@ -52,12 +54,14 @@ def request(query, params): def response(resp): results = [] results.append({ - 'url': escape(web_url.format(from_lang=resp.search_params['from_lang'][2], - to_lang=resp.search_params['to_lang'][2], - query=resp.search_params['query'])), - 'title': escape('[{0}-{1}] {2}'.format(resp.search_params['from_lang'][1], - resp.search_params['to_lang'][1], - resp.search_params['query'])), + 'url': escape(web_url.format( + from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'])), + 'title': escape('[{0}-{1}] {2}'.format( + resp.search_params['from_lang'][1], + resp.search_params['to_lang'][1], + resp.search_params['query'])), 'content': escape(resp.json()['responseData']['translatedText']) }) return results -- cgit v1.2.3 From 8c72a22757290754fc15fecb82dd157f6ea56a7f Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 16:12:34 +0200 Subject: [enh] api_key usage, disable the engine by default --- searx/engines/translated.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'searx/engines/translated.py') diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 2f535140c..3be9d4adf 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -7,11 +7,12 @@ from searx.languages import language_codes categories = ['general'] url = 'http://api.mymemory.translated.net/get?q={query}' \ - '&langpair={from_lang}|{to_lang}' + '&langpair={from_lang}|{to_lang}{key}' web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) +api_key = '' def is_valid_lang(lang): @@ -41,9 +42,14 @@ def request(query, params): if not from_lang or not to_lang: return params + if api_key: + key_form = '&key=' + api_key + else: + key_form = '' params['url'] = url.format(from_lang=from_lang[1], to_lang=to_lang[1], - query=query) + query=query, + key=key_form) params['query'] = query params['from_lang'] = from_lang params['to_lang'] = to_lang -- cgit v1.2.3 From b7d578ae8041658fe6f088eb337f42238c25e2f5 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 16:36:04 +0200 Subject: [enh] engine header comments --- searx/engines/translated.py | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'searx/engines/translated.py') diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 3be9d4adf..1b75e4f4e 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -1,3 +1,13 @@ +""" + MyMemory Translated + + @website https://mymemory.translated.net/ + @provide-api yes (https://mymemory.translated.net/doc/spec.php) + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" import re from urlparse import urljoin from lxml import html -- cgit v1.2.3 From 983415bc38937a637e9b2aae191f2e087765800b Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 16:43:48 +0200 Subject: [enh] is_valid_lang moved to utils --- searx/engines/translated.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'searx/engines/translated.py') diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 1b75e4f4e..3a077ae8e 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -13,7 +13,7 @@ from urlparse import urljoin from lxml import html from cgi import escape from searx.engines.xpath import extract_text -from searx.languages import language_codes +from searx.utils import is_valid_lang categories = ['general'] url = 'http://api.mymemory.translated.net/get?q={query}' \ @@ -25,20 +25,6 @@ parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) api_key = '' -def is_valid_lang(lang): - is_abbr = (len(lang) == 2) - if is_abbr: - for l in language_codes: - if l[0][:2] == lang.lower(): - return (True, l[0][:2], l[1].lower()) - return False - else: - for l in language_codes: - if l[1].lower() == lang.lower(): - return (True, l[0][:2], l[1].lower()) - return False - - def request(query, params): m = parser_re.match(unicode(query, 'utf8')) if not m: -- cgit v1.2.3