From 52e615dede8538c36f569d2cf07835427a9a0db6 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Wed, 30 Nov 2016 18:43:03 +0100 Subject: [enh] py3 compatibility --- searx/engines/1337x.py | 3 +-- searx/engines/__init__.py | 5 ++--- searx/engines/archlinux.py | 3 +-- searx/engines/base.py | 6 +++--- searx/engines/bing.py | 2 +- searx/engines/bing_images.py | 2 +- searx/engines/bing_news.py | 5 ++--- searx/engines/blekko_images.py | 2 +- searx/engines/btdigg.py | 5 ++--- searx/engines/currency_convert.py | 14 +++++++++----- searx/engines/dailymotion.py | 3 +-- searx/engines/deezer.py | 5 ++--- searx/engines/deviantart.py | 2 +- searx/engines/dictzone.py | 6 +++--- searx/engines/digbt.py | 8 ++++++-- searx/engines/digg.py | 4 ++-- searx/engines/doku.py | 2 +- searx/engines/duckduckgo.py | 2 +- searx/engines/duckduckgo_definitions.py | 6 +++--- searx/engines/faroo.py | 2 +- searx/engines/fdroid.py | 7 +++---- searx/engines/filecrop.py | 11 +++++++---- searx/engines/flickr.py | 2 +- searx/engines/flickr_noapi.py | 2 +- searx/engines/framalibre.py | 4 +--- searx/engines/frinkiac.py | 2 +- searx/engines/gigablast.py | 3 +-- searx/engines/github.py | 2 +- searx/engines/google.py | 5 ++--- searx/engines/google_images.py | 2 +- searx/engines/google_news.py | 3 +-- searx/engines/ina.py | 10 +++++++--- searx/engines/json_engine.py | 11 ++++++++--- searx/engines/kickass.py | 3 +-- searx/engines/mediawiki.py | 2 +- searx/engines/mixcloud.py | 2 +- searx/engines/nyaa.py | 2 +- searx/engines/openstreetmap.py | 4 ---- searx/engines/photon.py | 2 +- searx/engines/piratebay.py | 3 +-- searx/engines/qwant.py | 3 +-- searx/engines/reddit.py | 6 ++---- searx/engines/scanr_structures.py | 4 +--- searx/engines/searchcode_code.py | 5 ++--- searx/engines/searchcode_doc.py | 5 ++--- searx/engines/seedpeer.py | 4 +--- searx/engines/soundcloud.py | 19 ++++++++++++------- searx/engines/spotify.py | 5 ++--- searx/engines/stackoverflow.py | 6 ++---- searx/engines/startpage.py | 2 +- searx/engines/subtitleseeker.py | 2 +- searx/engines/swisscows.py | 27 +++++++++++++-------------- searx/engines/tokyotoshokan.py | 11 +++++------ searx/engines/torrentz.py | 8 ++++---- searx/engines/translated.py | 4 ++++ searx/engines/twitter.py | 3 +-- searx/engines/vimeo.py | 2 +- searx/engines/wikidata.py | 13 +++++-------- searx/engines/wikipedia.py | 21 +++++++++------------ searx/engines/wolframalpha_api.py | 13 ++++++------- searx/engines/wolframalpha_noapi.py | 9 ++++----- searx/engines/www1x.py | 6 ++---- searx/engines/www500px.py | 3 +-- searx/engines/xpath.py | 4 ++-- searx/engines/yacy.py | 2 +- searx/engines/yahoo.py | 3 +-- searx/engines/yahoo_news.py | 6 +++--- searx/engines/yandex.py | 4 ++-- searx/engines/youtube_api.py | 2 +- searx/engines/youtube_noapi.py | 2 +- 70 files changed, 178 insertions(+), 195 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index c6bc3cb6d..0de04bd95 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,8 +1,7 @@ -from urllib import quote from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from urlparse import urljoin +from searx.url_utils import quote, urljoin url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 77184a282..023ec409a 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -72,12 +72,11 @@ def load_engine(engine_data): if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map( - str.strip, engine_data['categories'].split(',')) + engine.categories = list(map(str.strip, engine_data['categories'].split(','))) continue setattr(engine, param_name, engine_data[param_name]) - for arg_name, arg_value in engine_default_args.iteritems(): + for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index dca825790..cad06f8c6 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -11,10 +11,9 @@ @parse url, title """ -from urlparse import urljoin -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] diff --git a/searx/engines/base.py b/searx/engines/base.py index a552453ce..ff006a3bc 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -14,10 +14,10 @@ """ from lxml import etree -from urllib import urlencode -from searx.utils import searx_useragent from datetime import datetime import re +from searx.url_utils import urlencode +from searx.utils import searx_useragent categories = ['science'] @@ -73,7 +73,7 @@ def request(query, params): def response(resp): results = [] - search_results = etree.XML(resp.content) + search_results = etree.XML(resp.text) for entry in search_results.xpath('./result/doc'): content = "No description available" diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 4e7ead82d..052d567ea 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -13,9 +13,9 @@ @todo publishedDate """ -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['general'] diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 97f6dca37..e79740e50 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -15,11 +15,11 @@ limited response to 10 images """ -from urllib import urlencode from lxml import html from json import loads import re from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 765bcd38e..8e3cc517e 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -11,13 +11,12 @@ @parse url, title, content, publishedDate, thumbnail """ -from urllib import urlencode -from urlparse import urlparse, parse_qsl from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode, urlparse, parse_qsl # engine dependent config categories = ['news'] @@ -86,7 +85,7 @@ def request(query, params): def response(resp): results = [] - rss = etree.fromstring(resp.content) + rss = etree.fromstring(resp.text) ns = rss.nsmap diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py index c0664f390..f71645634 100644 --- a/searx/engines/blekko_images.py +++ b/searx/engines/blekko_images.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 33c8355de..40438673f 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -10,11 +10,10 @@ @parse url, title, content, seed, leech, magnetlink """ -from urlparse import urljoin -from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.url_utils import quote, urljoin from searx.utils import get_torrent_size # engine dependent config @@ -38,7 +37,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@id="search_res"]/table/tr') diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index bc839cfb5..1218d4849 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,21 +1,25 @@ -from datetime import datetime +import json import re import os -import json +import sys import unicodedata +from datetime import datetime + +if sys.version_info[0] == 3: + unicode = str categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa +parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) db = 1 def normalize_name(name): - name = name.lower().replace('-', ' ').rstrip('s') + name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() @@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language): def request(query, params): - m = parser_re.match(unicode(query, 'utf8')) + m = parser_re.match(query) if not m: # wrong query return params diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 8c69aafe0..fad7e596c 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -12,10 +12,9 @@ @todo set content-parameter with correct data """ -from urllib import urlencode from json import loads from datetime import datetime -from requests import get +from searx.url_utils import urlencode # engine dependent config categories = ['videos'] diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 3db1af3d2..af63478fb 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -30,8 +30,7 @@ embedded_url = '' +cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) + def get_client_id(): response = http_get("https://soundcloud.com") - rx_namespace = {"re": "http://exslt.org/regular-expressions"} if response.ok: - tree = etree.parse(StringIO(response.content), etree.HTMLParser()) - script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) + tree = html.fromstring(response.content) + script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content @@ -51,7 +56,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) + cids = cid_re.search(response.text) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 249ba91ef..aed756be3 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -29,8 +29,7 @@ embedded_url = '