Merge pull request #634 from kvch/advanced-search

support time range search
author: Adam Tauber <asciimoo@gmail.com> 2016-07-26 00:06:16 +0200
committer: GitHub <noreply@github.com> 2016-07-26 00:06:16 +0200
commit: 7d9c898170df497036b8a7a70a1a5c86c3859670 (patch)
tree: ca1722a5d0d1bc8493e50bd334a07ea50e744e63 /searx/engines
parent: 54d987636e4b03d19a99ad9d143bf63b119af208 (diff)
parent: 90e74fbb288b2f1df0516d877d3bd239c7800412 (diff)
6 files changed, 60 insertions, 9 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 52823f1d2..782b622b0 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -42,7 +42,8 @@ engine_default_args = {'paging': False,
                        'shortcut': '-',
                        'disabled': False,
                        'suspend_end_time': 0,
-                       'continuous_errors': 0}
+                       'continuous_errors': 0,
+                       'time_range_support': False}
 
 
 def load_module(filename):
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index 70761370c..ef1dd9e5f 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -21,10 +21,16 @@ from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['images']
 paging = True
+time_range_support = True
 
 # search-url
 base_url = 'https://www.deviantart.com/'
 search_url = base_url + 'browse/all/?offset={offset}&{query}'
+time_range_url = '&order={range}'
+
+time_range_dict = {'day': 11,
+                   'week': 14,
+                   'month': 15}
 
 
 # do search-request
@@ -33,6 +39,8 @@ def request(query, params):
 
     params['url'] = search_url.format(offset=offset,
                                       query=urlencode({'q': query}))
+    if params['time_range']:
+        params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
 
     return params
 
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index d29e4416a..aa7a98754 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -22,9 +22,15 @@ from searx.languages import language_codes
 categories = ['general']
 paging = True
 language_support = True
+time_range_support = True
 
 # search-url
 url = 'https://duckduckgo.com/html?{query}&s={offset}'
+time_range_url = '&df={range}'
+
+time_range_dict = {'day': 'd',
+                   'week': 'w',
+                   'month': 'm'}
 
 # specific xpath variables
 result_xpath = '//div[@class="result results_links results_links_deep web-result "]'  # noqa
@@ -61,6 +67,9 @@ def request(query, params):
         params['url'] = url.format(
             query=urlencode({'q': query}), offset=offset)
 
+    if params['time_range']:
+        params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
+
     return params
 
 
diff --git a/searx/engines/google.py b/searx/engines/google.py
index fd5e7b54c..3b845cbcd 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -24,6 +24,7 @@ categories = ['general']
 paging = True
 language_support = True
 use_locale_domain = True
+time_range_support = True
 
 # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
 default_hostname = 'www.google.com'
@@ -92,6 +93,11 @@ search_url = ('https://{hostname}' +
               search_path +
               '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x')
 
+time_range_search = "&tbs=qdr:{range}"
+time_range_dict = {'day': 'd',
+                   'week': 'w',
+                   'month': 'm'}
+
 # other URLs
 map_hostname_start = 'maps.google.'
 maps_path = '/maps'
@@ -179,6 +185,8 @@ def request(query, params):
                                       query=urlencode({'q': query}),
                                       hostname=google_hostname,
                                       lang=url_lang)
+    if params['time_range']:
+        params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])
 
     params['headers']['Accept-Language'] = language
     params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index efe46812a..b687317c8 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -19,12 +19,17 @@ from lxml import html
 categories = ['images']
 paging = True
 safesearch = True
+time_range_support = True
 
 search_url = 'https://www.google.com/search'\
     '?{query}'\
     '&tbm=isch'\
     '&ijn=1'\
     '&start={offset}'
+time_range_search = "&tbs=qdr:{range}"
+time_range_dict = {'day': 'd',
+                   'week': 'w',
+                   'month': 'm'}
 
 
 # do search-request
@@ -34,6 +39,8 @@ def request(query, params):
     params['url'] = search_url.format(query=urlencode({'q': query}),
                                       offset=offset,
                                       safesearch=safesearch)
+    if params['time_range']:
+        params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])
 
     if safesearch and params['safesearch']:
         params['url'] += '&' + urlencode({'safe': 'active'})
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index b8b40e4aa..82f782b9d 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url
 categories = ['general']
 paging = True
 language_support = True
+time_range_support = True
 
 # search-url
 base_url = 'https://search.yahoo.com/'
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
+search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
 
 # specific xpath variables
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
@@ -32,6 +34,10 @@ title_xpath = './/h3/a'
 content_xpath = './/div[@class="compText aAbs"]'
 suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
 
+time_range_dict = {'day': ['1d', 'd'],
+                   'week': ['1w', 'w'],
+                   'month': ['1m', 'm']}
+
 
 # remove yahoo-specific tracking-url
 def parse_url(url_string):
@@ -51,18 +57,30 @@ def parse_url(url_string):
         return unquote(url_string[start:end])
 
 
+def _get_url(query, offset, language, time_range):
+    if time_range:
+        return base_url + search_url_with_time.format(offset=offset,
+                                                      query=urlencode({'p': query}),
+                                                      lang=language,
+                                                      age=time_range_dict[time_range][0],
+                                                      btf=time_range_dict[time_range][1])
+    return base_url + search_url.format(offset=offset,
+                                        query=urlencode({'p': query}),
+                                        lang=language)
+
+
+def _get_language(params):
+    if params['language'] == 'all':
+        return 'en'
+    return params['language'].split('_')[0]
+
+
 # do search-request
 def request(query, params):
     offset = (params['pageno'] - 1) * 10 + 1
+    language = _get_language(params)
 
-    if params['language'] == 'all':
-        language = 'en'
-    else:
-        language = params['language'].split('_')[0]
-
-    params['url'] = base_url + search_url.format(offset=offset,
-                                                 query=urlencode({'p': query}),
-                                                 lang=language)
+    params['url'] = _get_url(query, offset, language, params['time_range'])
 
     # TODO required?
     params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
author	Adam Tauber <asciimoo@gmail.com>	2016-07-26 00:06:16 +0200
committer	GitHub <noreply@github.com>	2016-07-26 00:06:16 +0200
commit	7d9c898170df497036b8a7a70a1a5c86c3859670 (patch)
tree	ca1722a5d0d1bc8493e50bd334a07ea50e744e63 /searx/engines
parent	54d987636e4b03d19a99ad9d143bf63b119af208 (diff)
parent	90e74fbb288b2f1df0516d877d3bd239c7800412 (diff)