summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--searx/autocomplete.py4
-rw-r--r--searx/engines/dailymotion.py3
-rw-r--r--searx/engines/deviantart.py8
-rw-r--r--searx/engines/digg.py3
-rw-r--r--searx/engines/gigablast.py2
-rw-r--r--searx/engines/google.py3
-rw-r--r--searx/engines/google_images.py3
-rw-r--r--searx/engines/twitter.py4
-rw-r--r--searx/engines/www1x.py2
-rw-r--r--searx/tests/engines/test_deviantart.py2
-rw-r--r--searx/tests/engines/test_google.py3
-rw-r--r--searx/tests/engines/test_google_images.py2
-rw-r--r--searx/utils.py11
13 files changed, 33 insertions, 17 deletions
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
index 218d3fe76..f5775bc63 100644
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@@ -111,7 +111,7 @@ def searx_bang(full_query):
def dbpedia(query):
- # dbpedia autocompleter
+ # dbpedia autocompleter, no HTTPS
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' # noqa
response = get(autocomplete_url
@@ -139,7 +139,7 @@ def duckduckgo(query):
def google(query):
# google autocompleter
- autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&' # noqa
+ autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' # noqa
response = get(autocomplete_url
+ urlencode(dict(q=query)))
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 4b029205a..4eb894725 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -62,6 +62,9 @@ def response(resp):
publishedDate = datetime.fromtimestamp(res['created_time'], None)
embedded = embedded_url.format(videoid=res['id'])
+ # http to https
+ thumbnail = thumbnail.replace("http://", "https://")
+
results.append({'template': 'videos.html',
'url': url,
'title': title,
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index 6606215e8..60c8d7ea7 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -24,7 +24,7 @@ paging = True
# search-url
base_url = 'https://www.deviantart.com/'
-search_url = base_url+'search?offset={offset}&{query}'
+search_url = base_url+'browse/all/?offset={offset}&{query}'
# do search-request
@@ -58,6 +58,12 @@ def response(resp):
thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
img_src = regex.sub('/', thumbnail_src)
+ # http to https, remove domain sharding
+ thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src)
+ thumbnail_src = re.sub(r"http://", "https://", thumbnail_src)
+
+ url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url)
+
# append result
results.append({'url': url,
'title': title,
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 8a635e6c3..000f66ba2 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -60,6 +60,9 @@ def response(resp):
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
publishedDate = parser.parse(pubdate)
+ # http to https
+ thumbnail = thumbnail.replace("http://static.digg.com", "https://static.digg.com")
+
# append result
results.append({'url': url,
'title': title,
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index 242f93728..b852de9ba 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -19,7 +19,7 @@ categories = ['general']
paging = True
number_of_results = 5
-# search-url
+# search-url, invalid HTTPS certificate
base_url = 'http://gigablast.com/'
search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 9c768260a..807c58ed5 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -76,7 +76,8 @@ def request(query, params):
query=urlencode({'q': query}))
params['headers']['Accept-Language'] = language
- params['cookies']['PREF'] = get_google_pref_cookie()
+ if language.startswith('en'):
+ params['cookies']['PREF'] = get_google_pref_cookie()
return params
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 135b3e0af..85963a16f 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -58,6 +58,9 @@ def response(resp):
continue
thumbnail_src = result['tbUrl']
+ # http to https
+ thumbnail_src = thumbnail_src.replace("http://", "https://")
+
# append result
results.append({'url': href,
'title': title,
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 7407ecc95..a0ee18a47 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -29,8 +29,8 @@ search_url = base_url + 'search?'
# specific xpath variables
results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
-title_xpath = './/span[@class="username js-action-profile-name"]'
-content_xpath = './/p[@class="js-tweet-text tweet-text"]'
+title_xpath = './/span[contains(@class, "username")]'
+content_xpath = './/p[contains(@class, "tweet-text")]'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index bfb55e331..12868ad22 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -20,7 +20,7 @@ import re
categories = ['images']
paging = False
-# search-url
+# search-url, no HTTPS (there is a valid certificate for https://api2.1x.com/ )
base_url = 'http://1x.com'
search_url = base_url+'/backend/search.php?{query}'
diff --git a/searx/tests/engines/test_deviantart.py b/searx/tests/engines/test_deviantart.py
index 9cf68d0b8..78a391334 100644
--- a/searx/tests/engines/test_deviantart.py
+++ b/searx/tests/engines/test_deviantart.py
@@ -75,7 +75,7 @@ class TestDeviantartEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'Title of image')
self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
self.assertNotIn('content', results[0])
- self.assertEqual(results[0]['thumbnail_src'], 'http://url.of.thumbnail')
+ self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')
html = """
<span class="tt-fh-tc" style="width: 202px;">
diff --git a/searx/tests/engines/test_google.py b/searx/tests/engines/test_google.py
index 2c3d8e5f6..2a90fc5ec 100644
--- a/searx/tests/engines/test_google.py
+++ b/searx/tests/engines/test_google.py
@@ -17,12 +17,13 @@ class TestGoogleEngine(SearxTestCase):
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('google.com', params['url'])
- self.assertIn('PREF', params['cookies'])
+ self.assertNotIn('PREF', params['cookies'])
self.assertIn('fr', params['headers']['Accept-Language'])
dicto['language'] = 'all'
params = google.request(query, dicto)
self.assertIn('en', params['headers']['Accept-Language'])
+ self.assertIn('PREF', params['cookies'])
def test_response(self):
self.assertRaises(AttributeError, google.response, None)
diff --git a/searx/tests/engines/test_google_images.py b/searx/tests/engines/test_google_images.py
index 32d133334..9bef692d4 100644
--- a/searx/tests/engines/test_google_images.py
+++ b/searx/tests/engines/test_google_images.py
@@ -65,7 +65,7 @@ class TestGoogleImagesEngine(SearxTestCase):
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], 'http://this.is.the.url')
- self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.url')
+ self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url')
self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
self.assertEqual(results[0]['content'], '<b>test</b>')
diff --git a/searx/utils.py b/searx/utils.py
index e6c107e24..129971e31 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -17,17 +17,16 @@ from searx import logger
logger = logger.getChild('utils')
-ua_versions = ('31.0',
- '32.0',
- '33.0',
+ua_versions = ('33.0',
'34.0',
- '35.0')
+ '35.0',
+ '36.0',
+ '37.0')
ua_os = ('Windows NT 6.3; WOW64',
'X11; Linux x86_64',
'X11; Linux x86')
-
-ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
+ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
blocked_tags = ('script',
'style')