From 93c0c49e9aba719c8c8e6b171e0dd515a586d32b Mon Sep 17 00:00:00 2001 From: Noemi Vanyi Date: Sun, 17 Jul 2016 18:42:30 +0200 Subject: add time range search with yahoo --- searx/engines/__init__.py | 3 ++- searx/engines/yahoo.py | 33 +++++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 099baa587..2c735a188 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -42,7 +42,8 @@ engine_default_args = {'paging': False, 'shortcut': '-', 'disabled': False, 'suspend_end_time': 0, - 'continuous_errors': 0} + 'continuous_errors': 0, + 'time_range_support': False} def load_module(filename): diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index b8b40e4aa..2334614cb 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url categories = ['general'] paging = True language_support = True +time_range_support = True # search-url base_url = 'https://search.yahoo.com/' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' +search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' # specific xpath variables results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" @@ -32,6 +34,9 @@ title_xpath = './/h3/a' content_xpath = './/div[@class="compText aAbs"]' suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" +time_range_dict = {'day': ['1d', 'd'], + 'week': ['1w', 'w'], + 'month': ['1m', 'm']} # remove yahoo-specific tracking-url def parse_url(url_string): @@ -51,18 +56,30 @@ def parse_url(url_string): return unquote(url_string[start:end]) +def _get_url(query, offset, language, time_range): + if time_range: + return base_url + search_url_with_time.format(offset=offset, + query=urlencode({'p': query}), + lang=language, + age=time_range_dict[time_range][0], + btf=time_range_dict[time_range][1]) + return base_url + search_url.format(offset=offset, + query=urlencode({'p': query}), + lang=language) + + +def _get_language(params): + if params['language'] == 'all': + return 'en' + return params['language'].split('_')[0] + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 + language = _get_language(params) - if params['language'] == 'all': - language = 'en' - else: - language = params['language'].split('_')[0] - - params['url'] = base_url + search_url.format(offset=offset, - query=urlencode({'p': query}), - lang=language) + params['url'] = _get_url(query, offset, language, params['time_range']) # TODO required? params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\ -- cgit v1.2.3 From 2e5839503f7a147e6759e124a3694d1e6b6631fd Mon Sep 17 00:00:00 2001 From: Noemi Vanyi Date: Mon, 18 Jul 2016 16:09:18 +0200 Subject: add time range search for google --- searx/engines/google.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/google.py b/searx/engines/google.py index fd5e7b54c..3b845cbcd 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -24,6 +24,7 @@ categories = ['general'] paging = True language_support = True use_locale_domain = True +time_range_support = True # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests default_hostname = 'www.google.com' @@ -92,6 +93,11 @@ search_url = ('https://{hostname}' + search_path + '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x') +time_range_search = "&tbs=qdr:{range}" +time_range_dict = {'day': 'd', + 'week': 'w', + 'month': 'm'} + # other URLs map_hostname_start = 'maps.google.' maps_path = '/maps' @@ -179,6 +185,8 @@ def request(query, params): query=urlencode({'q': query}), hostname=google_hostname, lang=url_lang) + if params['time_range']: + params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) params['headers']['Accept-Language'] = language params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -- cgit v1.2.3 From f13b9fa36ae51861b8c5a1e110895af0f8c5f555 Mon Sep 17 00:00:00 2001 From: Noemi Vanyi Date: Mon, 18 Jul 2016 16:15:37 +0200 Subject: add time range search for duckduckgo --- searx/engines/duckduckgo.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index d29e4416a..aa7a98754 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -22,9 +22,15 @@ from searx.languages import language_codes categories = ['general'] paging = True language_support = True +time_range_support = True # search-url url = 'https://duckduckgo.com/html?{query}&s={offset}' +time_range_url = '&df={range}' + +time_range_dict = {'day': 'd', + 'week': 'w', + 'month': 'm'} # specific xpath variables result_xpath = '//div[@class="result results_links results_links_deep web-result "]' # noqa @@ -61,6 +67,9 @@ def request(query, params): params['url'] = url.format( query=urlencode({'q': query}), offset=offset) + if params['time_range']: + params['url'] += time_range_url.format(range=time_range_dict[params['time_range']]) + return params -- cgit v1.2.3 From e9a78f1434806df7235f575db3d3267b5ae852fe Mon Sep 17 00:00:00 2001 From: Noemi Vanyi Date: Mon, 18 Jul 2016 17:25:40 +0200 Subject: add time range search for google images --- searx/engines/google_images.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'searx/engines') diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index efe46812a..eab0fba08 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -19,13 +19,17 @@ from lxml import html categories = ['images'] paging = True safesearch = True +time_range_support = True search_url = 'https://www.google.com/search'\ '?{query}'\ '&tbm=isch'\ '&ijn=1'\ '&start={offset}' - +time_range_search = "&tbs=qdr:{range}" +time_range_dict = {'day': 'd', + 'week': 'w', + 'month': 'm'} # do search-request def request(query, params): @@ -34,6 +38,8 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch) + if params['time_range']: + params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) if safesearch and params['safesearch']: params['url'] += '&' + urlencode({'safe': 'active'}) -- cgit v1.2.3 From e7baf24ec16366da000dc37b7e9e4377138ee0a2 Mon Sep 17 00:00:00 2001 From: Noemi Vanyi Date: Tue, 19 Jul 2016 10:06:47 +0200 Subject: add time range search for deviantart --- searx/engines/deviantart.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 70761370c..ef1dd9e5f 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -21,10 +21,16 @@ from searx.engines.xpath import extract_text # engine dependent config categories = ['images'] paging = True +time_range_support = True # search-url base_url = 'https://www.deviantart.com/' search_url = base_url + 'browse/all/?offset={offset}&{query}' +time_range_url = '&order={range}' + +time_range_dict = {'day': 11, + 'week': 14, + 'month': 15} # do search-request @@ -33,6 +39,8 @@ def request(query, params): params['url'] = search_url.format(offset=offset, query=urlencode({'q': query})) + if params['time_range']: + params['url'] += time_range_url.format(range=time_range_dict[params['time_range']]) return params -- cgit v1.2.3 From a7c8d5882c137634b754e9757340de85029012e6 Mon Sep 17 00:00:00 2001 From: Noemi Vanyi Date: Tue, 19 Jul 2016 10:14:11 +0200 Subject: fix pep8 --- searx/engines/google_images.py | 1 + searx/engines/yahoo.py | 1 + 2 files changed, 2 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index eab0fba08..b687317c8 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -31,6 +31,7 @@ time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm'} + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 100 diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 2334614cb..82f782b9d 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -38,6 +38,7 @@ time_range_dict = {'day': ['1d', 'd'], 'week': ['1w', 'w'], 'month': ['1m', 'm']} + # remove yahoo-specific tracking-url def parse_url(url_string): endings = ['/RS', '/RK'] -- cgit v1.2.3