diff options
| author | asciimoo <asciimoo@gmail.com> | 2014-01-31 04:35:23 +0100 |
|---|---|---|
| committer | asciimoo <asciimoo@gmail.com> | 2014-01-31 04:35:23 +0100 |
| commit | 2a788c8f29f63bc069436f1a12343a47d66f2523 (patch) | |
| tree | cc44113f213cd565797d3c9a87e8afe72ccf5539 /searx/engines | |
| parent | 90a93422865b81ce256ab02a367a4d3529eb2d18 (diff) | |
[enh] search language support init
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/__init__.py | 12 | ||||
| -rw-r--r-- | searx/engines/bing.py | 12 | ||||
| -rw-r--r-- | searx/engines/google.py | 13 | ||||
| -rw-r--r-- | searx/engines/wikipedia.py | 30 |
4 files changed, 59 insertions, 8 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 73a63f0a3..ac0c13c76 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -53,8 +53,14 @@ if not 'engines' in settings or not settings['engines']: for engine_data in settings['engines']: engine_name = engine_data['engine'] engine = load_module(engine_name + '.py') + if not hasattr(engine, 'paging'): engine.paging = False + + if not hasattr(engine, 'language_support'): + #engine.language_support = False + engine.language_support = True + for param_name in engine_data: if param_name == 'engine': continue @@ -158,7 +164,7 @@ def score_results(results): return sorted(results, key=itemgetter('score'), reverse=True) -def search(query, request, selected_engines, pageno=1): +def search(query, request, selected_engines, pageno=1, lang='all'): global engines, categories, number_of_searches requests = [] results = {} @@ -176,11 +182,15 @@ def search(query, request, selected_engines, pageno=1): if pageno > 1 and not engine.paging: continue + if lang != 'all' and not engine.language_support: + continue + request_params = default_request_params() request_params['headers']['User-Agent'] = user_agent request_params['category'] = selected_engine['category'] request_params['started'] = datetime.now() request_params['pageno'] = pageno + request_params['language'] = lang request_params = engine.request(query, request_params) callback = make_callback( diff --git a/searx/engines/bing.py b/searx/engines/bing.py index ec8a0c44c..00f66b118 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -4,16 +4,22 @@ from cgi import escape base_url = 'http://www.bing.com/' search_string = 'search?{query}&first={offset}' -locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx - paging = True +language_support = True def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 + if params['language'] == 'all': + language = 'en-US' + else: + language = params['language'].replace('_', '-') search_path = search_string.format( - query=urlencode({'q': query, 'setmkt': locale}), + query=urlencode({'q': query, 'setmkt': language}), offset=offset) + + params['cookies']['SRCHHPGUSR'] = \ + 'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] #if params['category'] == 'images': # params['url'] = base_url + 'images/' + search_path params['url'] = base_url + search_path diff --git a/searx/engines/google.py b/searx/engines/google.py index b90c7adc5..2c6a98af3 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -5,16 +5,21 @@ from json import loads categories = ['general'] -paging = True - url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/web?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa +search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa + +paging = True +language_support = True def request(query, params): offset = (params['pageno'] - 1) * 8 + language = 'en-US' + if params['language'] != 'all': + language = params['language'].replace('_', '-') params['url'] = search_url.format(offset=offset, - query=urlencode({'q': query})) + query=urlencode({'q': query}), + language=language) return params diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py new file mode 100644 index 000000000..1e2a798cc --- /dev/null +++ b/searx/engines/wikipedia.py @@ -0,0 +1,30 @@ +from json import loads +from urllib import urlencode, quote + +url = 'https://{language}.wikipedia.org/' + +search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}' # noqa + +number_of_results = 10 + +language_support = True + + +def request(query, params): + offset = (params['pageno'] - 1) * 10 + if params['language'] == 'all': + language = 'en' + else: + language = params['language'].split('_')[0] + params['language'] = language + params['url'] = search_url.format(query=urlencode({'srsearch': query}), + offset=offset, + language=language) + return params + + +def response(resp): + search_results = loads(resp.text) + res = search_results.get('query', {}).get('search', []) + return [{'url': url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa + 'title': result['title']} for result in res[:int(number_of_results)]] |