diff options
| -rw-r--r-- | searx/engines/framalibre.py | 5 | ||||
| -rw-r--r-- | searx/engines/google.py | 5 | ||||
| -rw-r--r-- | searx/engines/google_images.py | 16 | ||||
| -rw-r--r-- | searx/engines/soundcloud.py | 4 | ||||
| -rw-r--r-- | searx/settings.yml | 2 | ||||
| -rw-r--r-- | searx/webapp.py | 5 |
6 files changed, 28 insertions, 9 deletions
diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index 146cdaeec..f3441fa5f 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -10,7 +10,10 @@ @parse url, title, content, thumbnail, img_src """ -from cgi import escape +try: + from cgi import escape +except: + from html import escape from lxml import html from searx.engines.xpath import extract_text from searx.url_utils import urljoin, urlencode diff --git a/searx/engines/google.py b/searx/engines/google.py index 03f0523e7..0a0d6ccb1 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -199,8 +199,9 @@ def request(query, params): params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' - # Force Internet Explorer 12 user agent to avoid loading the new UI that Searx can't parse - params['headers']['User-Agent'] = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)" + # Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse + params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)" + "AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1") params['google_hostname'] = google_hostname diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index d9a49e9cc..636913114 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -70,11 +70,21 @@ def response(resp): try: metadata = loads(result) - img_format = "{0} {1}x{2}".format(metadata['ity'], str(metadata['ow']), str(metadata['oh'])) - source = "{0} ({1})".format(metadata['st'], metadata['isu']) + + img_format = metadata.get('ity', '') + img_width = metadata.get('ow', '') + img_height = metadata.get('oh', '') + if img_width and img_height: + img_format += " {0}x{1}".format(img_width, img_height) + + source = metadata.get('st', '') + source_url = metadata.get('isu', '') + if source_url: + source += " ({0})".format(source_url) + results.append({'url': metadata['ru'], 'title': metadata['pt'], - 'content': metadata['s'], + 'content': metadata.get('s', ''), 'source': source, 'img_format': img_format, 'thumbnail_src': metadata['tu'], diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 870998545..284689bf6 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -51,7 +51,9 @@ def get_client_id(): if response.ok: tree = html.fromstring(response.content) - script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") + # script_tags has been moved from /assets/app/ to /assets/ path. I + # found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js + script_tags = tree.xpath("//script[contains(@src, '/assets/')]") app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content diff --git a/searx/settings.yml b/searx/settings.yml index 25d90d4db..c6f805331 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -744,7 +744,7 @@ engines: title_xpath : ./h2 content_xpath : ./p[@class="s"] suggestion_xpath : /html/body//div[@class="top-info"]/p[@class="top-info spell"]/a - first_page_num : 1 + first_page_num : 0 page_size : 10 disabled : True diff --git a/searx/webapp.py b/searx/webapp.py index 3bb29140a..183bf1975 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -41,7 +41,10 @@ except: logger.critical("cannot import dependency: pygments") from sys import exit exit(1) -from cgi import escape +try: + from cgi import escape +except: + from html import escape from datetime import datetime, timedelta from time import time from werkzeug.contrib.fixers import ProxyFix |