summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
Diffstat (limited to 'searx')
-rw-r--r--searx/autocomplete.py6
-rw-r--r--searx/engines/__init__.py4
-rw-r--r--searx/engines/bing.py26
-rw-r--r--searx/engines/bing_images.py32
-rw-r--r--searx/engines/bing_news.py24
-rw-r--r--searx/engines/blekko_images.py20
-rw-r--r--searx/engines/btdigg.py20
-rw-r--r--searx/engines/dailymotion.py29
-rw-r--r--searx/engines/deezer.py20
-rw-r--r--searx/engines/deviantart.py32
-rw-r--r--searx/engines/digg.py23
-rw-r--r--searx/engines/duckduckgo.py30
-rw-r--r--searx/engines/dummy.py10
-rw-r--r--searx/engines/faroo.py20
-rw-r--r--searx/engines/flickr.py26
-rw-r--r--searx/engines/flickr_noapi.py24
-rw-r--r--searx/engines/generalfile.py24
-rw-r--r--searx/engines/gigablast.py22
-rw-r--r--searx/engines/github.py22
-rw-r--r--searx/engines/google.py3
-rw-r--r--searx/engines/google_images.py25
-rw-r--r--searx/engines/google_news.py22
-rw-r--r--searx/engines/json_engine.py2
-rw-r--r--searx/engines/kickass.py20
-rw-r--r--searx/engines/mediawiki.py24
-rw-r--r--searx/engines/mixcloud.py20
-rw-r--r--searx/engines/openstreetmap.py20
-rw-r--r--searx/engines/photon.py20
-rw-r--r--searx/engines/searchcode_code.py20
-rw-r--r--searx/engines/searchcode_doc.py20
-rw-r--r--searx/engines/soundcloud.py20
-rw-r--r--searx/engines/spotify.py20
-rw-r--r--searx/engines/stackoverflow.py20
-rw-r--r--searx/engines/subtitleseeker.py20
-rw-r--r--searx/engines/twitter.py28
-rw-r--r--searx/engines/www1x.py21
-rw-r--r--searx/engines/www500px.py24
-rw-r--r--searx/engines/yacy.py2
-rw-r--r--searx/engines/yahoo.py22
-rw-r--r--searx/engines/youtube.py4
-rw-r--r--searx/plugins/search_on_category_select.py3
-rw-r--r--searx/testing.py4
-rw-r--r--searx/tests/engines/test_deviantart.py2
-rw-r--r--searx/tests/engines/test_google.py3
-rw-r--r--searx/tests/engines/test_google_images.py2
-rw-r--r--searx/utils.py11
46 files changed, 450 insertions, 366 deletions
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
index 83e204890..f5775bc63 100644
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@@ -28,7 +28,7 @@ from searx.poolrequests import get as http_get
def get(*args, **kwargs):
- if not 'timeout' in kwargs:
+ if 'timeout' not in kwargs:
kwargs['timeout'] = settings['server']['request_timeout']
return http_get(*args, **kwargs)
@@ -111,7 +111,7 @@ def searx_bang(full_query):
def dbpedia(query):
- # dbpedia autocompleter
+ # dbpedia autocompleter, no HTTPS
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' # noqa
response = get(autocomplete_url
@@ -139,7 +139,7 @@ def duckduckgo(query):
def google(query):
# google autocompleter
- autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&' # noqa
+ autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' # noqa
response = get(autocomplete_url
+ urlencode(dict(q=query)))
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 21a307501..18a45d851 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -86,7 +86,7 @@ def load_engine(engine_data):
continue
if getattr(engine, engine_attr) is None:
logger.error('Missing engine config attribute: "{0}.{1}"'
- .format(engine.name, engine_attr))
+ .format(engine.name, engine_attr))
sys.exit(1)
engine.stats = {
@@ -106,7 +106,7 @@ def load_engine(engine_data):
if engine.shortcut:
if engine.shortcut in engine_shortcuts:
logger.error('Engine config error: ambigious shortcut: {0}'
- .format(engine.shortcut))
+ .format(engine.shortcut))
sys.exit(1)
engine_shortcuts[engine.shortcut] = engine.name
return engine
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index f9c323d05..c72e6aeff 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -1,15 +1,17 @@
-## Bing (Web)
-#
-# @website https://www.bing.com
-# @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-# max. 5000 query/month
-#
-# @using-api no (because of query limit)
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content
-#
-# @todo publishedDate
+"""
+ Bing (Web)
+
+ @website https://www.bing.com
+ @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
+ max. 5000 query/month
+
+ @using-api no (because of query limit)
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content
+
+ @todo publishedDate
+"""
from urllib import urlencode
from cgi import escape
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index d4e3d2b2b..b06a57edc 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -1,17 +1,19 @@
-## Bing (Images)
-#
-# @website https://www.bing.com/images
-# @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-# max. 5000 query/month
-#
-# @using-api no (because of query limit)
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, img_src
-#
-# @todo currently there are up to 35 images receive per page,
-# because bing does not parse count=10.
-# limited response to 10 images
+"""
+ Bing (Images)
+
+ @website https://www.bing.com/images
+ @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
+ max. 5000 query/month
+
+ @using-api no (because of query limit)
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, img_src
+
+ @todo currently there are up to 35 images receive per page,
+ because bing does not parse count=10.
+ limited response to 10 images
+"""
from urllib import urlencode
from lxml import html
@@ -76,7 +78,7 @@ def response(resp):
title = link.attrib.get('t1')
ihk = link.attrib.get('ihk')
- #url = 'http://' + link.attrib.get('t3')
+ # url = 'http://' + link.attrib.get('t3')
url = yaml_data.get('surl')
img_src = yaml_data.get('imgurl')
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index e6adb2644..1e5d361c1 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -1,13 +1,15 @@
-## Bing (News)
-#
-# @website https://www.bing.com/news
-# @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-# max. 5000 query/month
-#
-# @using-api no (because of query limit)
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content, publishedDate
+"""
+ Bing (News)
+
+ @website https://www.bing.com/news
+ @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
+ max. 5000 query/month
+
+ @using-api no (because of query limit)
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content, publishedDate
+"""
from urllib import urlencode
from cgi import escape
@@ -87,6 +89,8 @@ def response(resp):
publishedDate = parser.parse(publishedDate, dayfirst=False)
except TypeError:
publishedDate = datetime.now()
+ except ValueError:
+ publishedDate = datetime.now()
# append result
results.append({'url': url,
diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py
index 2e7ec904f..93ac6616b 100644
--- a/searx/engines/blekko_images.py
+++ b/searx/engines/blekko_images.py
@@ -1,12 +1,14 @@
-## Blekko (Images)
-#
-# @website https://blekko.com
-# @provide-api yes (inofficial)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, img_src
+"""
+ Blekko (Images)
+
+ @website https://blekko.com
+ @provide-api yes (inofficial)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, img_src
+"""
from json import loads
from urllib import urlencode
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
index 944250628..bde866146 100644
--- a/searx/engines/btdigg.py
+++ b/searx/engines/btdigg.py
@@ -1,12 +1,14 @@
-## BTDigg (Videos, Music, Files)
-#
-# @website https://btdigg.org
-# @provide-api yes (on demand)
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content, seed, leech, magnetlink
+"""
+ BTDigg (Videos, Music, Files)
+
+ @website https://btdigg.org
+ @provide-api yes (on demand)
+
+ @using-api no
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content, seed, leech, magnetlink
+"""
from urlparse import urljoin
from cgi import escape
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 03b1dbb8b..4eb894725 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -1,14 +1,16 @@
-## Dailymotion (Videos)
-#
-# @website https://www.dailymotion.com
-# @provide-api yes (http://www.dailymotion.com/developer)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, thumbnail, publishedDate, embedded
-#
-# @todo set content-parameter with correct data
+"""
+ Dailymotion (Videos)
+
+ @website https://www.dailymotion.com
+ @provide-api yes (http://www.dailymotion.com/developer)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, thumbnail, publishedDate, embedded
+
+ @todo set content-parameter with correct data
+"""
from urllib import urlencode
from json import loads
@@ -48,7 +50,7 @@ def response(resp):
search_res = loads(resp.text)
# return empty array if there are no results
- if not 'list' in search_res:
+ if 'list' not in search_res:
return []
# parse results
@@ -60,6 +62,9 @@ def response(resp):
publishedDate = datetime.fromtimestamp(res['created_time'], None)
embedded = embedded_url.format(videoid=res['id'])
+ # http to https
+ thumbnail = thumbnail.replace("http://", "https://")
+
results.append({'template': 'videos.html',
'url': url,
'title': title,
diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py
index 7fbd3c200..0530bc072 100644
--- a/searx/engines/deezer.py
+++ b/searx/engines/deezer.py
@@ -1,12 +1,14 @@
-## Deezer (Music)
-#
-# @website https://deezer.com
-# @provide-api yes (http://developers.deezer.com/api/)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, embedded
+"""
+ Deezer (Music)
+
+ @website https://deezer.com
+ @provide-api yes (http://developers.deezer.com/api/)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content, embedded
+"""
from json import loads
from urllib import urlencode
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index 4198e8c76..60c8d7ea7 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -1,14 +1,16 @@
-## Deviantart (Images)
-#
-# @website https://www.deviantart.com/
-# @provide-api yes (https://www.deviantart.com/developers/) (RSS)
-#
-# @using-api no (TODO, rewrite to api)
-# @results HTML
-# @stable no (HTML can change)
-# @parse url, title, thumbnail_src, img_src
-#
-# @todo rewrite to api
+"""
+ Deviantart (Images)
+
+ @website https://www.deviantart.com/
+ @provide-api yes (https://www.deviantart.com/developers/) (RSS)
+
+ @using-api no (TODO, rewrite to api)
+ @results HTML
+ @stable no (HTML can change)
+ @parse url, title, thumbnail_src, img_src
+
+ @todo rewrite to api
+"""
from urllib import urlencode
from urlparse import urljoin
@@ -22,7 +24,7 @@ paging = True
# search-url
base_url = 'https://www.deviantart.com/'
-search_url = base_url+'search?offset={offset}&{query}'
+search_url = base_url+'browse/all/?offset={offset}&{query}'
# do search-request
@@ -56,6 +58,12 @@ def response(resp):
thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
img_src = regex.sub('/', thumbnail_src)
+ # http to https, remove domain sharding
+ thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src)
+ thumbnail_src = re.sub(r"http://", "https://", thumbnail_src)
+
+ url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url)
+
# append result
results.append({'url': url,
'title': title,
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 1b5f2c8e4..000f66ba2 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -1,12 +1,14 @@
-## Digg (News, Social media)
-#
-# @website https://digg.com/
-# @provide-api no
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content, publishedDate, thumbnail
+"""
+ Digg (News, Social media)
+
+ @website https://digg.com/
+ @provide-api no
+
+ @using-api no
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content, publishedDate, thumbnail
+"""
from urllib import quote_plus
from json import loads
@@ -58,6 +60,9 @@ def response(resp):
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
publishedDate = parser.parse(pubdate)
+ # http to https
+ thumbnail = thumbnail.replace("http://static.digg.com", "https://static.digg.com")
+
# append result
results.append({'url': url,
'title': title,
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index e35a6334c..4ac2099ae 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -1,17 +1,19 @@
-## DuckDuckGo (Web)
-#
-# @website https://duckduckgo.com/
-# @provide-api yes (https://duckduckgo.com/api),
-# but not all results from search-site
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content
-#
-# @todo rewrite to api
-# @todo language support
-# (the current used site does not support language-change)
+"""
+ DuckDuckGo (Web)
+
+ @website https://duckduckgo.com/
+ @provide-api yes (https://duckduckgo.com/api),
+ but not all results from search-site
+
+ @using-api no
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content
+
+ @todo rewrite to api
+ @todo language support
+ (the current used site does not support language-change)
+"""
from urllib import urlencode
from lxml.html import fromstring
diff --git a/searx/engines/dummy.py b/searx/engines/dummy.py
index c60b7a5d2..50b56ef78 100644
--- a/searx/engines/dummy.py
+++ b/searx/engines/dummy.py
@@ -1,7 +1,9 @@
-## Dummy
-#
-# @results empty array
-# @stable yes
+"""
+ Dummy
+
+ @results empty array
+ @stable yes
+"""
# do search-request
diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py
index 4a5e60a60..43df14eef 100644
--- a/searx/engines/faroo.py
+++ b/searx/engines/faroo.py
@@ -1,12 +1,14 @@
-## Faroo (Web, News)
-#
-# @website http://www.faroo.com
-# @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, publishedDate, img_src
+"""
+ Faroo (Web, News)
+
+ @website http://www.faroo.com
+ @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content, publishedDate, img_src
+"""
from urllib import urlencode
from json import loads
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index 4040236e1..68d45bc17 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -1,15 +1,17 @@
#!/usr/bin/env python
-## Flickr (Images)
-#
-# @website https://www.flickr.com
-# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, thumbnail, img_src
-#More info on api-key : https://www.flickr.com/services/apps/create/
+"""
+ Flickr (Images)
+
+ @website https://www.flickr.com
+ @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, thumbnail, img_src
+ More info on api-key : https://www.flickr.com/services/apps/create/
+"""
from urllib import urlencode
from json import loads
@@ -48,10 +50,10 @@ def response(resp):
search_results = loads(resp.text)
# return empty array if there are no results
- if not 'photos' in search_results:
+ if 'photos' not in search_results:
return []
- if not 'photo' in search_results['photos']:
+ if 'photo' not in search_results['photos']:
return []
photos = search_results['photos']['photo']
diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
index 3a83fdc65..2071b8e36 100644
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -1,14 +1,16 @@
#!/usr/bin/env python
-# Flickr (Images)
-#
-# @website https://www.flickr.com
-# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-#
-# @using-api no
-# @results HTML
-# @stable no
-# @parse url, title, thumbnail, img_src
+"""
+ Flickr (Images)
+
+ @website https://www.flickr.com
+ @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
+
+ @using-api no
+ @results HTML
+ @stable no
+ @parse url, title, thumbnail, img_src
+"""
from urllib import urlencode
from json import loads
@@ -20,8 +22,8 @@ logger = logger.getChild('flickr-noapi')
categories = ['images']
-url = 'https://secure.flickr.com/'
-search_url = url + 'search/?{query}&page={page}'
+url = 'https://www.flickr.com/'
+search_url = url + 'search?{query}&page={page}'
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
diff --git a/searx/engines/generalfile.py b/searx/engines/generalfile.py
index b7d716298..3bb27444f 100644
--- a/searx/engines/generalfile.py
+++ b/searx/engines/generalfile.py
@@ -1,14 +1,16 @@
-## General Files (Files)
-#
-# @website http://www.general-files.org
-# @provide-api no (nothing found)
-#
-# @using-api no (because nothing found)
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content
-#
-# @todo detect torrents?
+"""
+ General Files (Files)
+
+ @website http://www.general-files.org
+ @provide-api no (nothing found)
+
+ @using-api no (because nothing found)
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content
+
+ @todo detect torrents?
+"""
from lxml import html
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index 8749c3256..b852de9ba 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -1,12 +1,14 @@
-## Gigablast (Web)
-#
-# @website http://gigablast.com
-# @provide-api yes (http://gigablast.com/api.html)
-#
-# @using-api yes
-# @results XML
-# @stable yes
-# @parse url, title, content
+"""
+ Gigablast (Web)
+
+ @website http://gigablast.com
+ @provide-api yes (http://gigablast.com/api.html)
+
+ @using-api yes
+ @results XML
+ @stable yes
+ @parse url, title, content
+"""
from urllib import urlencode
from cgi import escape
@@ -17,7 +19,7 @@ categories = ['general']
paging = True
number_of_results = 5
-# search-url
+# search-url, invalid HTTPS certificate
base_url = 'http://gigablast.com/'
search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
diff --git a/searx/engines/github.py b/searx/engines/github.py
index a68aed141..cc1fc470c 100644
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
@@ -1,12 +1,14 @@
-## Github (It)
-#
-# @website https://github.com/
-# @provide-api yes (https://developer.github.com/v3/)
-#
-# @using-api yes
-# @results JSON
-# @stable yes (using api)
-# @parse url, title, content
+"""
+ Github (It)
+
+ @website https://github.com/
+ @provide-api yes (https://developer.github.com/v3/)
+
+ @using-api yes
+ @results JSON
+ @stable yes (using api)
+ @parse url, title, content
+"""
from urllib import urlencode
from json import loads
@@ -37,7 +39,7 @@ def response(resp):
search_res = loads(resp.text)
# check if items are recieved
- if not 'items' in search_res:
+ if 'items' not in search_res:
return []
# parse results
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 9c768260a..807c58ed5 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -76,7 +76,8 @@ def request(query, params):
query=urlencode({'q': query}))
params['headers']['Accept-Language'] = language
- params['cookies']['PREF'] = get_google_pref_cookie()
+ if language.startswith('en'):
+ params['cookies']['PREF'] = get_google_pref_cookie()
return params
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 1c0e62f5c..85963a16f 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -1,13 +1,15 @@
-## Google (Images)
-#
-# @website https://www.google.com
-# @provide-api yes (https://developers.google.com/web-search/docs/),
-# deprecated!
-#
-# @using-api yes
-# @results JSON
-# @stable yes (but deprecated)
-# @parse url, title, img_src
+"""
+ Google (Images)
+
+ @website https://www.google.com
+ @provide-api yes (https://developers.google.com/web-search/docs/),
+ deprecated!
+
+ @using-api yes
+ @results JSON
+ @stable yes (but deprecated)
+ @parse url, title, img_src
+"""
from urllib import urlencode, unquote
from json import loads
@@ -56,6 +58,9 @@ def response(resp):
continue
thumbnail_src = result['tbUrl']
+ # http to https
+ thumbnail_src = thumbnail_src.replace("http://", "https://")
+
# append result
results.append({'url': href,
'title': title,
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 3e4371b99..95d15cfb9 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -1,13 +1,15 @@
-## Google (News)
-#
-# @website https://www.google.com
-# @provide-api yes (https://developers.google.com/web-search/docs/),
-# deprecated!
-#
-# @using-api yes
-# @results JSON
-# @stable yes (but deprecated)
-# @parse url, title, content, publishedDate
+"""
+ Google (News)
+
+ @website https://www.google.com
+ @provide-api yes (https://developers.google.com/web-search/docs/),
+ deprecated!
+
+ @using-api yes
+ @results JSON
+ @stable yes (but deprecated)
+ @parse url, title, content, publishedDate
+"""
from urllib import urlencode
from json import loads
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py
index 708b999f8..5525b7f7e 100644
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
@@ -6,7 +6,7 @@ search_url = None
url_query = None
content_query = None
title_query = None
-#suggestion_xpath = ''
+# suggestion_xpath = ''
def iterate(iterable):
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index 9c4639c32..4c5d24008 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -1,12 +1,14 @@
-## Kickass Torrent (Videos, Music, Files)
-#
-# @website https://kickass.so
-# @provide-api no (nothing found)
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable yes (HTML can change)
-# @parse url, title, content, seed, leech, magnetlink
+"""
+ Kickass Torrent (Videos, Music, Files)
+
+ @website https://kickass.so
+ @provide-api no (nothing found)
+
+ @using-api no
+ @results HTML (using search portal)
+ @stable yes (HTML can change)
+ @parse url, title, content, seed, leech, magnetlink
+"""
from urlparse import urljoin
from cgi import escape
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
index 8ca32c62a..9fb72e830 100644
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
@@ -1,14 +1,16 @@
-## general mediawiki-engine (Web)
-#
-# @website websites built on mediawiki (https://www.mediawiki.org)
-# @provide-api yes (http://www.mediawiki.org/wiki/API:Search)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title
-#
-# @todo content
+"""
+ general mediawiki-engine (Web)
+
+ @website websites built on mediawiki (https://www.mediawiki.org)
+ @provide-api yes (http://www.mediawiki.org/wiki/API:Search)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title
+
+ @todo content
+"""
from json import loads
from string import Formatter
diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py
index 918ae2908..312d297eb 100644
--- a/searx/engines/mixcloud.py
+++ b/searx/engines/mixcloud.py
@@ -1,12 +1,14 @@
-## Mixcloud (Music)
-#
-# @website https://http://www.mixcloud.com/
-# @provide-api yes (http://www.mixcloud.com/developers/
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, embedded, publishedDate
+"""
+ Mixcloud (Music)
+
+ @website https://http://www.mixcloud.com/
+ @provide-api yes (http://www.mixcloud.com/developers/
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content, embedded, publishedDate
+"""
from json import loads
from urllib import urlencode
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 60c3c13ca..38baaada9 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -1,12 +1,14 @@
-## OpenStreetMap (Map)
-#
-# @website https://openstreetmap.org/
-# @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title
+"""
+ OpenStreetMap (Map)
+
+ @website https://openstreetmap.org/
+ @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title
+"""
from json import loads
from searx.utils import searx_useragent
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 869916cd4..2197005e5 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -1,12 +1,14 @@
-## Photon (Map)
-#
-# @website https://photon.komoot.de
-# @provide-api yes (https://photon.komoot.de/)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title
+"""
+ Photon (Map)
+
+ @website https://photon.komoot.de
+ @provide-api yes (https://photon.komoot.de/)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title
+"""
from urllib import urlencode
from json import loads
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index f276697b1..21d9c4ac2 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -1,12 +1,14 @@
-## Searchcode (It)
-#
-# @website https://searchcode.com/
-# @provide-api yes (https://searchcode.com/api/)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content
+"""
+ Searchcode (It)
+
+ @website https://searchcode.com/
+ @provide-api yes (https://searchcode.com/api/)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content
+"""
from urllib import urlencode
from json import loads
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
index 76da8d752..582b98d79 100644
--- a/searx/engines/searchcode_doc.py
+++ b/searx/engines/searchcode_doc.py
@@ -1,12 +1,14 @@
-## Searchcode (It)
-#
-# @website https://searchcode.com/
-# @provide-api yes (https://searchcode.com/api/)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content
+"""
+ Searchcode (It)
+
+ @website https://searchcode.com/
+ @provide-api yes (https://searchcode.com/api/)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content
+"""
from urllib import urlencode
from json import loads
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index 44374af6f..46e17fc81 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -1,12 +1,14 @@
-## Soundcloud (Music)
-#
-# @website https://soundcloud.com
-# @provide-api yes (https://developers.soundcloud.com/)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, publishedDate, embedded
+"""
+ Soundcloud (Music)
+
+ @website https://soundcloud.com
+ @provide-api yes (https://developers.soundcloud.com/)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content, publishedDate, embedded
+"""
from json import loads
from urllib import urlencode, quote_plus
diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py
index 61f3721ec..f75796e83 100644
--- a/searx/engines/spotify.py
+++ b/searx/engines/spotify.py
@@ -1,12 +1,14 @@
-## Spotify (Music)
-#
-# @website https://spotify.com
-# @provide-api yes (https://developer.spotify.com/web-api/search-item/)
-#
-# @using-api yes
-# @results JSON
-# @stable yes
-# @parse url, title, content, embedded
+"""
+ Spotify (Music)
+
+ @website https://spotify.com
+ @provide-api yes (https://developer.spotify.com/web-api/search-item/)
+
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content, embedded
+"""
from json import loads
from urllib import urlencode
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index 95ab58c54..34ecabae7 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -1,12 +1,14 @@
-## Stackoverflow (It)
-#
-# @website https://stackoverflow.com/
-# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
-#
-# @using-api no
-# @results HTML
-# @stable no (HTML can change)
-# @parse url, title, content
+"""
+ Stackoverflow (It)
+
+ @website https://stackoverflow.com/
+ @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
+
+ @using-api no
+ @results HTML
+ @stable no (HTML can change)
+ @parse url, title, content
+"""
from urlparse import urljoin
from cgi import escape
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index acefe30ea..47d27d0b2 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -1,12 +1,14 @@
-## Subtitleseeker (Video)
-#
-# @website http://www.subtitleseeker.com
-# @provide-api no
-#
-# @using-api no
-# @results HTML
-# @stable no (HTML can change)
-# @parse url, title, content
+"""
+ Subtitleseeker (Video)
+
+ @website http://www.subtitleseeker.com
+ @provide-api no
+
+ @using-api no
+ @results HTML
+ @stable no (HTML can change)
+ @parse url, title, content
+"""
from cgi import escape
from urllib import quote_plus
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 0e35e6188..a0ee18a47 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -1,14 +1,16 @@
-## Twitter (Social media)
-#
-# @website https://twitter.com/
-# @provide-api yes (https://dev.twitter.com/docs/using-search)
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content
-#
-# @todo publishedDate
+"""
+ Twitter (Social media)
+
+ @website https://twitter.com/
+ @provide-api yes (https://dev.twitter.com/docs/using-search)
+
+ @using-api no
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content
+
+ @todo publishedDate
+"""
from urlparse import urljoin
from urllib import urlencode
@@ -27,8 +29,8 @@ search_url = base_url + 'search?'
# specific xpath variables
results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
-title_xpath = './/span[@class="username js-action-profile-name"]'
-content_xpath = './/p[@class="js-tweet-text tweet-text"]'
+title_xpath = './/span[contains(@class, "username")]'
+content_xpath = './/p[contains(@class, "tweet-text")]'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index a68c105ce..12868ad22 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -1,13 +1,14 @@
-## 1x (Images)
-#
-# @website http://1x.com/
-# @provide-api no
-#
-# @using-api no
-# @results HTML
-# @stable no (HTML can change)
-# @parse url, title, thumbnail, img_src, content
+"""
+ 1x (Images)
+ @website http://1x.com/
+ @provide-api no
+
+ @using-api no
+ @results HTML
+ @stable no (HTML can change)
+ @parse url, title, thumbnail, img_src, content
+"""
from urllib import urlencode
from urlparse import urljoin
@@ -19,7 +20,7 @@ import re
categories = ['images']
paging = False
-# search-url
+# search-url, no HTTPS (there is a valid certificate for https://api2.1x.com/ )
base_url = 'http://1x.com'
search_url = base_url+'/backend/search.php?{query}'
diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py
index 99dba4abf..c98e19443 100644
--- a/searx/engines/www500px.py
+++ b/searx/engines/www500px.py
@@ -1,14 +1,16 @@
-## 500px (Images)
-#
-# @website https://500px.com
-# @provide-api yes (https://developers.500px.com/)
-#
-# @using-api no
-# @results HTML
-# @stable no (HTML can change)
-# @parse url, title, thumbnail, img_src, content
-#
-# @todo rewrite to api
+"""
+ 500px (Images)
+
+ @website https://500px.com
+ @provide-api yes (https://developers.500px.com/)
+
+ @using-api no
+ @results HTML
+ @stable no (HTML can change)
+ @parse url, title, thumbnail, img_src, content
+
+ @todo rewrite to api
+"""
from urllib import urlencode
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 3d26c9cc4..c2f1bc7ef 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -1,4 +1,4 @@
-## Yacy (Web, Images, Videos, Music, Files)
+# Yacy (Web, Images, Videos, Music, Files)
#
# @website http://yacy.net
# @provide-api yes
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index 11663a415..769e7e47f 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -1,13 +1,15 @@
-## Yahoo (Web)
-#
-# @website https://search.yahoo.com/web
-# @provide-api yes (https://developer.yahoo.com/boss/search/),
-# $0.80/1000 queries
-#
-# @using-api no (because pricing)
-# @results HTML (using search portal)
-# @stable no (HTML can change)
-# @parse url, title, content, suggestion
+"""
+ Yahoo (Web)
+
+ @website https://search.yahoo.com/web
+ @provide-api yes (https://developer.yahoo.com/boss/search/),
+ $0.80/1000 queries
+
+ @using-api no (because pricing)
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content, suggestion
+"""
from urllib import urlencode
from urlparse import unquote
diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py
index 1375538a8..c77cd2d0e 100644
--- a/searx/engines/youtube.py
+++ b/searx/engines/youtube.py
@@ -1,4 +1,4 @@
-## Youtube (Videos)
+# Youtube (Videos)
#
# @website https://www.youtube.com/
# @provide-api yes (http://gdata-samples-youtube-search-py.appspot.com/)
@@ -47,7 +47,7 @@ def response(resp):
search_results = loads(resp.text)
# return empty array if there are no results
- if not 'feed' in search_results:
+ if 'feed' not in search_results:
return []
feed = search_results['feed']
diff --git a/searx/plugins/search_on_category_select.py b/searx/plugins/search_on_category_select.py
index d4b725acf..a1667021d 100644
--- a/searx/plugins/search_on_category_select.py
+++ b/searx/plugins/search_on_category_select.py
@@ -16,7 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
from flask.ext.babel import gettext
name = gettext('Search on category select')
-description = gettext('Perform search immediately if a category selected. Disable to select multiple categories.')
+description = gettext('Perform search immediately if a category selected. '
+ 'Disable to select multiple categories. (JavaScript required)')
default_on = True
js_dependencies = ('js/search_on_category_select.js',)
diff --git a/searx/testing.py b/searx/testing.py
index 51c44d826..e22ecf8fe 100644
--- a/searx/testing.py
+++ b/searx/testing.py
@@ -56,9 +56,7 @@ class SearxRobotLayer(Layer):
)
def tearDown(self):
- # send TERM signal to all processes in my group, to stop subprocesses
- os.killpg(os.getpgid(self.server.pid), 15)
-
+ os.kill(self.server.pid, 15)
# remove previously set environment variable
del os.environ['SEARX_SETTINGS_PATH']
diff --git a/searx/tests/engines/test_deviantart.py b/searx/tests/engines/test_deviantart.py
index 9cf68d0b8..78a391334 100644
--- a/searx/tests/engines/test_deviantart.py
+++ b/searx/tests/engines/test_deviantart.py
@@ -75,7 +75,7 @@ class TestDeviantartEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'Title of image')
self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
self.assertNotIn('content', results[0])
- self.assertEqual(results[0]['thumbnail_src'], 'http://url.of.thumbnail')
+ self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')
html = """
<span class="tt-fh-tc" style="width: 202px;">
diff --git a/searx/tests/engines/test_google.py b/searx/tests/engines/test_google.py
index 2c3d8e5f6..2a90fc5ec 100644
--- a/searx/tests/engines/test_google.py
+++ b/searx/tests/engines/test_google.py
@@ -17,12 +17,13 @@ class TestGoogleEngine(SearxTestCase):
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('google.com', params['url'])
- self.assertIn('PREF', params['cookies'])
+ self.assertNotIn('PREF', params['cookies'])
self.assertIn('fr', params['headers']['Accept-Language'])
dicto['language'] = 'all'
params = google.request(query, dicto)
self.assertIn('en', params['headers']['Accept-Language'])
+ self.assertIn('PREF', params['cookies'])
def test_response(self):
self.assertRaises(AttributeError, google.response, None)
diff --git a/searx/tests/engines/test_google_images.py b/searx/tests/engines/test_google_images.py
index 32d133334..9bef692d4 100644
--- a/searx/tests/engines/test_google_images.py
+++ b/searx/tests/engines/test_google_images.py
@@ -65,7 +65,7 @@ class TestGoogleImagesEngine(SearxTestCase):
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], 'http://this.is.the.url')
- self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.url')
+ self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url')
self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
self.assertEqual(results[0]['content'], '<b>test</b>')
diff --git a/searx/utils.py b/searx/utils.py
index e6c107e24..129971e31 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -17,17 +17,16 @@ from searx import logger
logger = logger.getChild('utils')
-ua_versions = ('31.0',
- '32.0',
- '33.0',
+ua_versions = ('33.0',
'34.0',
- '35.0')
+ '35.0',
+ '36.0',
+ '37.0')
ua_os = ('Windows NT 6.3; WOW64',
'X11; Linux x86_64',
'X11; Linux x86')
-
-ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
+ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
blocked_tags = ('script',
'style')