diff options
Diffstat (limited to 'searx')
46 files changed, 450 insertions, 366 deletions
diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 83e204890..f5775bc63 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -28,7 +28,7 @@ from searx.poolrequests import get as http_get def get(*args, **kwargs): - if not 'timeout' in kwargs: + if 'timeout' not in kwargs: kwargs['timeout'] = settings['server']['request_timeout'] return http_get(*args, **kwargs) @@ -111,7 +111,7 @@ def searx_bang(full_query): def dbpedia(query): - # dbpedia autocompleter + # dbpedia autocompleter, no HTTPS autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' # noqa response = get(autocomplete_url @@ -139,7 +139,7 @@ def duckduckgo(query): def google(query): # google autocompleter - autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&' # noqa + autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' # noqa response = get(autocomplete_url + urlencode(dict(q=query))) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 21a307501..18a45d851 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -86,7 +86,7 @@ def load_engine(engine_data): continue if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"' - .format(engine.name, engine_attr)) + .format(engine.name, engine_attr)) sys.exit(1) engine.stats = { @@ -106,7 +106,7 @@ def load_engine(engine_data): if engine.shortcut: if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}' - .format(engine.shortcut)) + .format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine diff --git a/searx/engines/bing.py b/searx/engines/bing.py index f9c323d05..c72e6aeff 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -1,15 +1,17 @@ -## Bing (Web) -# -# @website https://www.bing.com -# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), -# max. 5000 query/month -# -# @using-api no (because of query limit) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content -# -# @todo publishedDate +""" + Bing (Web) + + @website https://www.bing.com + @provide-api yes (http://datamarket.azure.com/dataset/bing/search), + max. 5000 query/month + + @using-api no (because of query limit) + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content + + @todo publishedDate +""" from urllib import urlencode from cgi import escape diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index d4e3d2b2b..b06a57edc 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -1,17 +1,19 @@ -## Bing (Images) -# -# @website https://www.bing.com/images -# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), -# max. 5000 query/month -# -# @using-api no (because of query limit) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, img_src -# -# @todo currently there are up to 35 images receive per page, -# because bing does not parse count=10. -# limited response to 10 images +""" + Bing (Images) + + @website https://www.bing.com/images + @provide-api yes (http://datamarket.azure.com/dataset/bing/search), + max. 5000 query/month + + @using-api no (because of query limit) + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, img_src + + @todo currently there are up to 35 images receive per page, + because bing does not parse count=10. + limited response to 10 images +""" from urllib import urlencode from lxml import html @@ -76,7 +78,7 @@ def response(resp): title = link.attrib.get('t1') ihk = link.attrib.get('ihk') - #url = 'http://' + link.attrib.get('t3') + # url = 'http://' + link.attrib.get('t3') url = yaml_data.get('surl') img_src = yaml_data.get('imgurl') diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index e6adb2644..1e5d361c1 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -1,13 +1,15 @@ -## Bing (News) -# -# @website https://www.bing.com/news -# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), -# max. 5000 query/month -# -# @using-api no (because of query limit) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, publishedDate +""" + Bing (News) + + @website https://www.bing.com/news + @provide-api yes (http://datamarket.azure.com/dataset/bing/search), + max. 5000 query/month + + @using-api no (because of query limit) + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, publishedDate +""" from urllib import urlencode from cgi import escape @@ -87,6 +89,8 @@ def response(resp): publishedDate = parser.parse(publishedDate, dayfirst=False) except TypeError: publishedDate = datetime.now() + except ValueError: + publishedDate = datetime.now() # append result results.append({'url': url, diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py index 2e7ec904f..93ac6616b 100644 --- a/searx/engines/blekko_images.py +++ b/searx/engines/blekko_images.py @@ -1,12 +1,14 @@ -## Blekko (Images) -# -# @website https://blekko.com -# @provide-api yes (inofficial) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, img_src +""" + Blekko (Images) + + @website https://blekko.com + @provide-api yes (inofficial) + + @using-api yes + @results JSON + @stable yes + @parse url, title, img_src +""" from json import loads from urllib import urlencode diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 944250628..bde866146 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -1,12 +1,14 @@ -## BTDigg (Videos, Music, Files) -# -# @website https://btdigg.org -# @provide-api yes (on demand) -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, seed, leech, magnetlink +""" + BTDigg (Videos, Music, Files) + + @website https://btdigg.org + @provide-api yes (on demand) + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, seed, leech, magnetlink +""" from urlparse import urljoin from cgi import escape diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 03b1dbb8b..4eb894725 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -1,14 +1,16 @@ -## Dailymotion (Videos) -# -# @website https://www.dailymotion.com -# @provide-api yes (http://www.dailymotion.com/developer) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, thumbnail, publishedDate, embedded -# -# @todo set content-parameter with correct data +""" + Dailymotion (Videos) + + @website https://www.dailymotion.com + @provide-api yes (http://www.dailymotion.com/developer) + + @using-api yes + @results JSON + @stable yes + @parse url, title, thumbnail, publishedDate, embedded + + @todo set content-parameter with correct data +""" from urllib import urlencode from json import loads @@ -48,7 +50,7 @@ def response(resp): search_res = loads(resp.text) # return empty array if there are no results - if not 'list' in search_res: + if 'list' not in search_res: return [] # parse results @@ -60,6 +62,9 @@ def response(resp): publishedDate = datetime.fromtimestamp(res['created_time'], None) embedded = embedded_url.format(videoid=res['id']) + # http to https + thumbnail = thumbnail.replace("http://", "https://") + results.append({'template': 'videos.html', 'url': url, 'title': title, diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 7fbd3c200..0530bc072 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -1,12 +1,14 @@ -## Deezer (Music) -# -# @website https://deezer.com -# @provide-api yes (http://developers.deezer.com/api/) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, embedded +""" + Deezer (Music) + + @website https://deezer.com + @provide-api yes (http://developers.deezer.com/api/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, embedded +""" from json import loads from urllib import urlencode diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 4198e8c76..60c8d7ea7 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -1,14 +1,16 @@ -## Deviantart (Images) -# -# @website https://www.deviantart.com/ -# @provide-api yes (https://www.deviantart.com/developers/) (RSS) -# -# @using-api no (TODO, rewrite to api) -# @results HTML -# @stable no (HTML can change) -# @parse url, title, thumbnail_src, img_src -# -# @todo rewrite to api +""" + Deviantart (Images) + + @website https://www.deviantart.com/ + @provide-api yes (https://www.deviantart.com/developers/) (RSS) + + @using-api no (TODO, rewrite to api) + @results HTML + @stable no (HTML can change) + @parse url, title, thumbnail_src, img_src + + @todo rewrite to api +""" from urllib import urlencode from urlparse import urljoin @@ -22,7 +24,7 @@ paging = True # search-url base_url = 'https://www.deviantart.com/' -search_url = base_url+'search?offset={offset}&{query}' +search_url = base_url+'browse/all/?offset={offset}&{query}' # do search-request @@ -56,6 +58,12 @@ def response(resp): thumbnail_src = link.xpath('.//img')[0].attrib.get('src') img_src = regex.sub('/', thumbnail_src) + # http to https, remove domain sharding + thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src) + thumbnail_src = re.sub(r"http://", "https://", thumbnail_src) + + url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url) + # append result results.append({'url': url, 'title': title, diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 1b5f2c8e4..000f66ba2 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -1,12 +1,14 @@ -## Digg (News, Social media) -# -# @website https://digg.com/ -# @provide-api no -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, publishedDate, thumbnail +""" + Digg (News, Social media) + + @website https://digg.com/ + @provide-api no + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, publishedDate, thumbnail +""" from urllib import quote_plus from json import loads @@ -58,6 +60,9 @@ def response(resp): pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime') publishedDate = parser.parse(pubdate) + # http to https + thumbnail = thumbnail.replace("http://static.digg.com", "https://static.digg.com") + # append result results.append({'url': url, 'title': title, diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index e35a6334c..4ac2099ae 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -1,17 +1,19 @@ -## DuckDuckGo (Web) -# -# @website https://duckduckgo.com/ -# @provide-api yes (https://duckduckgo.com/api), -# but not all results from search-site -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content -# -# @todo rewrite to api -# @todo language support -# (the current used site does not support language-change) +""" + DuckDuckGo (Web) + + @website https://duckduckgo.com/ + @provide-api yes (https://duckduckgo.com/api), + but not all results from search-site + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content + + @todo rewrite to api + @todo language support + (the current used site does not support language-change) +""" from urllib import urlencode from lxml.html import fromstring diff --git a/searx/engines/dummy.py b/searx/engines/dummy.py index c60b7a5d2..50b56ef78 100644 --- a/searx/engines/dummy.py +++ b/searx/engines/dummy.py @@ -1,7 +1,9 @@ -## Dummy -# -# @results empty array -# @stable yes +""" + Dummy + + @results empty array + @stable yes +""" # do search-request diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py index 4a5e60a60..43df14eef 100644 --- a/searx/engines/faroo.py +++ b/searx/engines/faroo.py @@ -1,12 +1,14 @@ -## Faroo (Web, News) -# -# @website http://www.faroo.com -# @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, img_src +""" + Faroo (Web, News) + + @website http://www.faroo.com + @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, publishedDate, img_src +""" from urllib import urlencode from json import loads diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 4040236e1..68d45bc17 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -1,15 +1,17 @@ #!/usr/bin/env python -## Flickr (Images) -# -# @website https://www.flickr.com -# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, thumbnail, img_src -#More info on api-key : https://www.flickr.com/services/apps/create/ +""" + Flickr (Images) + + @website https://www.flickr.com + @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) + + @using-api yes + @results JSON + @stable yes + @parse url, title, thumbnail, img_src + More info on api-key : https://www.flickr.com/services/apps/create/ +""" from urllib import urlencode from json import loads @@ -48,10 +50,10 @@ def response(resp): search_results = loads(resp.text) # return empty array if there are no results - if not 'photos' in search_results: + if 'photos' not in search_results: return [] - if not 'photo' in search_results['photos']: + if 'photo' not in search_results['photos']: return [] photos = search_results['photos']['photo'] diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 3a83fdc65..2071b8e36 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -1,14 +1,16 @@ #!/usr/bin/env python -# Flickr (Images) -# -# @website https://www.flickr.com -# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) -# -# @using-api no -# @results HTML -# @stable no -# @parse url, title, thumbnail, img_src +""" + Flickr (Images) + + @website https://www.flickr.com + @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) + + @using-api no + @results HTML + @stable no + @parse url, title, thumbnail, img_src +""" from urllib import urlencode from json import loads @@ -20,8 +22,8 @@ logger = logger.getChild('flickr-noapi') categories = ['images'] -url = 'https://secure.flickr.com/' -search_url = url + 'search/?{query}&page={page}' +url = 'https://www.flickr.com/' +search_url = url + 'search?{query}&page={page}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') diff --git a/searx/engines/generalfile.py b/searx/engines/generalfile.py index b7d716298..3bb27444f 100644 --- a/searx/engines/generalfile.py +++ b/searx/engines/generalfile.py @@ -1,14 +1,16 @@ -## General Files (Files) -# -# @website http://www.general-files.org -# @provide-api no (nothing found) -# -# @using-api no (because nothing found) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content -# -# @todo detect torrents? +""" + General Files (Files) + + @website http://www.general-files.org + @provide-api no (nothing found) + + @using-api no (because nothing found) + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content + + @todo detect torrents? +""" from lxml import html diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 8749c3256..b852de9ba 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -1,12 +1,14 @@ -## Gigablast (Web) -# -# @website http://gigablast.com -# @provide-api yes (http://gigablast.com/api.html) -# -# @using-api yes -# @results XML -# @stable yes -# @parse url, title, content +""" + Gigablast (Web) + + @website http://gigablast.com + @provide-api yes (http://gigablast.com/api.html) + + @using-api yes + @results XML + @stable yes + @parse url, title, content +""" from urllib import urlencode from cgi import escape @@ -17,7 +19,7 @@ categories = ['general'] paging = True number_of_results = 5 -# search-url +# search-url, invalid HTTPS certificate base_url = 'http://gigablast.com/' search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' diff --git a/searx/engines/github.py b/searx/engines/github.py index a68aed141..cc1fc470c 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -1,12 +1,14 @@ -## Github (It) -# -# @website https://github.com/ -# @provide-api yes (https://developer.github.com/v3/) -# -# @using-api yes -# @results JSON -# @stable yes (using api) -# @parse url, title, content +""" + Github (It) + + @website https://github.com/ + @provide-api yes (https://developer.github.com/v3/) + + @using-api yes + @results JSON + @stable yes (using api) + @parse url, title, content +""" from urllib import urlencode from json import loads @@ -37,7 +39,7 @@ def response(resp): search_res = loads(resp.text) # check if items are recieved - if not 'items' in search_res: + if 'items' not in search_res: return [] # parse results diff --git a/searx/engines/google.py b/searx/engines/google.py index 9c768260a..807c58ed5 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -76,7 +76,8 @@ def request(query, params): query=urlencode({'q': query})) params['headers']['Accept-Language'] = language - params['cookies']['PREF'] = get_google_pref_cookie() + if language.startswith('en'): + params['cookies']['PREF'] = get_google_pref_cookie() return params diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 1c0e62f5c..85963a16f 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,13 +1,15 @@ -## Google (Images) -# -# @website https://www.google.com -# @provide-api yes (https://developers.google.com/web-search/docs/), -# deprecated! -# -# @using-api yes -# @results JSON -# @stable yes (but deprecated) -# @parse url, title, img_src +""" + Google (Images) + + @website https://www.google.com + @provide-api yes (https://developers.google.com/web-search/docs/), + deprecated! + + @using-api yes + @results JSON + @stable yes (but deprecated) + @parse url, title, img_src +""" from urllib import urlencode, unquote from json import loads @@ -56,6 +58,9 @@ def response(resp): continue thumbnail_src = result['tbUrl'] + # http to https + thumbnail_src = thumbnail_src.replace("http://", "https://") + # append result results.append({'url': href, 'title': title, diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 3e4371b99..95d15cfb9 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -1,13 +1,15 @@ -## Google (News) -# -# @website https://www.google.com -# @provide-api yes (https://developers.google.com/web-search/docs/), -# deprecated! -# -# @using-api yes -# @results JSON -# @stable yes (but deprecated) -# @parse url, title, content, publishedDate +""" + Google (News) + + @website https://www.google.com + @provide-api yes (https://developers.google.com/web-search/docs/), + deprecated! + + @using-api yes + @results JSON + @stable yes (but deprecated) + @parse url, title, content, publishedDate +""" from urllib import urlencode from json import loads diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 708b999f8..5525b7f7e 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -6,7 +6,7 @@ search_url = None url_query = None content_query = None title_query = None -#suggestion_xpath = '' +# suggestion_xpath = '' def iterate(iterable): diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 9c4639c32..4c5d24008 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -1,12 +1,14 @@ -## Kickass Torrent (Videos, Music, Files) -# -# @website https://kickass.so -# @provide-api no (nothing found) -# -# @using-api no -# @results HTML (using search portal) -# @stable yes (HTML can change) -# @parse url, title, content, seed, leech, magnetlink +""" + Kickass Torrent (Videos, Music, Files) + + @website https://kickass.so + @provide-api no (nothing found) + + @using-api no + @results HTML (using search portal) + @stable yes (HTML can change) + @parse url, title, content, seed, leech, magnetlink +""" from urlparse import urljoin from cgi import escape diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 8ca32c62a..9fb72e830 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -1,14 +1,16 @@ -## general mediawiki-engine (Web) -# -# @website websites built on mediawiki (https://www.mediawiki.org) -# @provide-api yes (http://www.mediawiki.org/wiki/API:Search) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title -# -# @todo content +""" + general mediawiki-engine (Web) + + @website websites built on mediawiki (https://www.mediawiki.org) + @provide-api yes (http://www.mediawiki.org/wiki/API:Search) + + @using-api yes + @results JSON + @stable yes + @parse url, title + + @todo content +""" from json import loads from string import Formatter diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index 918ae2908..312d297eb 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -1,12 +1,14 @@ -## Mixcloud (Music) -# -# @website https://http://www.mixcloud.com/ -# @provide-api yes (http://www.mixcloud.com/developers/ -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, embedded, publishedDate +""" + Mixcloud (Music) + + @website https://http://www.mixcloud.com/ + @provide-api yes (http://www.mixcloud.com/developers/ + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, embedded, publishedDate +""" from json import loads from urllib import urlencode diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 60c3c13ca..38baaada9 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -1,12 +1,14 @@ -## OpenStreetMap (Map) -# -# @website https://openstreetmap.org/ -# @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title +""" + OpenStreetMap (Map) + + @website https://openstreetmap.org/ + @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim) + + @using-api yes + @results JSON + @stable yes + @parse url, title +""" from json import loads from searx.utils import searx_useragent diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 869916cd4..2197005e5 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -1,12 +1,14 @@ -## Photon (Map) -# -# @website https://photon.komoot.de -# @provide-api yes (https://photon.komoot.de/) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title +""" + Photon (Map) + + @website https://photon.komoot.de + @provide-api yes (https://photon.komoot.de/) + + @using-api yes + @results JSON + @stable yes + @parse url, title +""" from urllib import urlencode from json import loads diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index f276697b1..21d9c4ac2 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -1,12 +1,14 @@ -## Searchcode (It) -# -# @website https://searchcode.com/ -# @provide-api yes (https://searchcode.com/api/) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content +""" + Searchcode (It) + + @website https://searchcode.com/ + @provide-api yes (https://searchcode.com/api/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" from urllib import urlencode from json import loads diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index 76da8d752..582b98d79 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -1,12 +1,14 @@ -## Searchcode (It) -# -# @website https://searchcode.com/ -# @provide-api yes (https://searchcode.com/api/) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content +""" + Searchcode (It) + + @website https://searchcode.com/ + @provide-api yes (https://searchcode.com/api/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" from urllib import urlencode from json import loads diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 44374af6f..46e17fc81 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -1,12 +1,14 @@ -## Soundcloud (Music) -# -# @website https://soundcloud.com -# @provide-api yes (https://developers.soundcloud.com/) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, embedded +""" + Soundcloud (Music) + + @website https://soundcloud.com + @provide-api yes (https://developers.soundcloud.com/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, publishedDate, embedded +""" from json import loads from urllib import urlencode, quote_plus diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 61f3721ec..f75796e83 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -1,12 +1,14 @@ -## Spotify (Music) -# -# @website https://spotify.com -# @provide-api yes (https://developer.spotify.com/web-api/search-item/) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, embedded +""" + Spotify (Music) + + @website https://spotify.com + @provide-api yes (https://developer.spotify.com/web-api/search-item/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, embedded +""" from json import loads from urllib import urlencode diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index 95ab58c54..34ecabae7 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -1,12 +1,14 @@ -## Stackoverflow (It) -# -# @website https://stackoverflow.com/ -# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) -# -# @using-api no -# @results HTML -# @stable no (HTML can change) -# @parse url, title, content +""" + Stackoverflow (It) + + @website https://stackoverflow.com/ + @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) + + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content +""" from urlparse import urljoin from cgi import escape diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index acefe30ea..47d27d0b2 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -1,12 +1,14 @@ -## Subtitleseeker (Video) -# -# @website http://www.subtitleseeker.com -# @provide-api no -# -# @using-api no -# @results HTML -# @stable no (HTML can change) -# @parse url, title, content +""" + Subtitleseeker (Video) + + @website http://www.subtitleseeker.com + @provide-api no + + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content +""" from cgi import escape from urllib import quote_plus diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 0e35e6188..a0ee18a47 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -1,14 +1,16 @@ -## Twitter (Social media) -# -# @website https://twitter.com/ -# @provide-api yes (https://dev.twitter.com/docs/using-search) -# -# @using-api no -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content -# -# @todo publishedDate +""" + Twitter (Social media) + + @website https://twitter.com/ + @provide-api yes (https://dev.twitter.com/docs/using-search) + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content + + @todo publishedDate +""" from urlparse import urljoin from urllib import urlencode @@ -27,8 +29,8 @@ search_url = base_url + 'search?' # specific xpath variables results_xpath = '//li[@data-item-type="tweet"]' link_xpath = './/small[@class="time"]//a' -title_xpath = './/span[@class="username js-action-profile-name"]' -content_xpath = './/p[@class="js-tweet-text tweet-text"]' +title_xpath = './/span[contains(@class, "username")]' +content_xpath = './/p[contains(@class, "tweet-text")]' timestamp_xpath = './/span[contains(@class,"_timestamp")]' diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index a68c105ce..12868ad22 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -1,13 +1,14 @@ -## 1x (Images) -# -# @website http://1x.com/ -# @provide-api no -# -# @using-api no -# @results HTML -# @stable no (HTML can change) -# @parse url, title, thumbnail, img_src, content +""" + 1x (Images) + @website http://1x.com/ + @provide-api no + + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, thumbnail, img_src, content +""" from urllib import urlencode from urlparse import urljoin @@ -19,7 +20,7 @@ import re categories = ['images'] paging = False -# search-url +# search-url, no HTTPS (there is a valid certificate for https://api2.1x.com/ ) base_url = 'http://1x.com' search_url = base_url+'/backend/search.php?{query}' diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py index 99dba4abf..c98e19443 100644 --- a/searx/engines/www500px.py +++ b/searx/engines/www500px.py @@ -1,14 +1,16 @@ -## 500px (Images) -# -# @website https://500px.com -# @provide-api yes (https://developers.500px.com/) -# -# @using-api no -# @results HTML -# @stable no (HTML can change) -# @parse url, title, thumbnail, img_src, content -# -# @todo rewrite to api +""" + 500px (Images) + + @website https://500px.com + @provide-api yes (https://developers.500px.com/) + + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, thumbnail, img_src, content + + @todo rewrite to api +""" from urllib import urlencode diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 3d26c9cc4..c2f1bc7ef 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -1,4 +1,4 @@ -## Yacy (Web, Images, Videos, Music, Files) +# Yacy (Web, Images, Videos, Music, Files) # # @website http://yacy.net # @provide-api yes diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 11663a415..769e7e47f 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -1,13 +1,15 @@ -## Yahoo (Web) -# -# @website https://search.yahoo.com/web -# @provide-api yes (https://developer.yahoo.com/boss/search/), -# $0.80/1000 queries -# -# @using-api no (because pricing) -# @results HTML (using search portal) -# @stable no (HTML can change) -# @parse url, title, content, suggestion +""" + Yahoo (Web) + + @website https://search.yahoo.com/web + @provide-api yes (https://developer.yahoo.com/boss/search/), + $0.80/1000 queries + + @using-api no (because pricing) + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, suggestion +""" from urllib import urlencode from urlparse import unquote diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py index 1375538a8..c77cd2d0e 100644 --- a/searx/engines/youtube.py +++ b/searx/engines/youtube.py @@ -1,4 +1,4 @@ -## Youtube (Videos) +# Youtube (Videos) # # @website https://www.youtube.com/ # @provide-api yes (http://gdata-samples-youtube-search-py.appspot.com/) @@ -47,7 +47,7 @@ def response(resp): search_results = loads(resp.text) # return empty array if there are no results - if not 'feed' in search_results: + if 'feed' not in search_results: return [] feed = search_results['feed'] diff --git a/searx/plugins/search_on_category_select.py b/searx/plugins/search_on_category_select.py index d4b725acf..a1667021d 100644 --- a/searx/plugins/search_on_category_select.py +++ b/searx/plugins/search_on_category_select.py @@ -16,7 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' from flask.ext.babel import gettext name = gettext('Search on category select') -description = gettext('Perform search immediately if a category selected. Disable to select multiple categories.') +description = gettext('Perform search immediately if a category selected. ' + 'Disable to select multiple categories. (JavaScript required)') default_on = True js_dependencies = ('js/search_on_category_select.js',) diff --git a/searx/testing.py b/searx/testing.py index 51c44d826..e22ecf8fe 100644 --- a/searx/testing.py +++ b/searx/testing.py @@ -56,9 +56,7 @@ class SearxRobotLayer(Layer): ) def tearDown(self): - # send TERM signal to all processes in my group, to stop subprocesses - os.killpg(os.getpgid(self.server.pid), 15) - + os.kill(self.server.pid, 15) # remove previously set environment variable del os.environ['SEARX_SETTINGS_PATH'] diff --git a/searx/tests/engines/test_deviantart.py b/searx/tests/engines/test_deviantart.py index 9cf68d0b8..78a391334 100644 --- a/searx/tests/engines/test_deviantart.py +++ b/searx/tests/engines/test_deviantart.py @@ -75,7 +75,7 @@ class TestDeviantartEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'Title of image') self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url') self.assertNotIn('content', results[0]) - self.assertEqual(results[0]['thumbnail_src'], 'http://url.of.thumbnail') + self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail') html = """ <span class="tt-fh-tc" style="width: 202px;"> diff --git a/searx/tests/engines/test_google.py b/searx/tests/engines/test_google.py index 2c3d8e5f6..2a90fc5ec 100644 --- a/searx/tests/engines/test_google.py +++ b/searx/tests/engines/test_google.py @@ -17,12 +17,13 @@ class TestGoogleEngine(SearxTestCase): self.assertIn('url', params) self.assertIn(query, params['url']) self.assertIn('google.com', params['url']) - self.assertIn('PREF', params['cookies']) + self.assertNotIn('PREF', params['cookies']) self.assertIn('fr', params['headers']['Accept-Language']) dicto['language'] = 'all' params = google.request(query, dicto) self.assertIn('en', params['headers']['Accept-Language']) + self.assertIn('PREF', params['cookies']) def test_response(self): self.assertRaises(AttributeError, google.response, None) diff --git a/searx/tests/engines/test_google_images.py b/searx/tests/engines/test_google_images.py index 32d133334..9bef692d4 100644 --- a/searx/tests/engines/test_google_images.py +++ b/searx/tests/engines/test_google_images.py @@ -65,7 +65,7 @@ class TestGoogleImagesEngine(SearxTestCase): self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['url'], 'http://this.is.the.url') - self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.url') + self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url') self.assertEqual(results[0]['img_src'], 'http://image.url.jpg') self.assertEqual(results[0]['content'], '<b>test</b>') diff --git a/searx/utils.py b/searx/utils.py index e6c107e24..129971e31 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -17,17 +17,16 @@ from searx import logger logger = logger.getChild('utils') -ua_versions = ('31.0', - '32.0', - '33.0', +ua_versions = ('33.0', '34.0', - '35.0') + '35.0', + '36.0', + '37.0') ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64', 'X11; Linux x86') - -ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}" +ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}" blocked_tags = ('script', 'style') |