summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorMathieu Brunot <mathieu.brunot@monogramm.io>2020-01-14 15:43:43 +0100
committerGitHub <noreply@github.com>2020-01-14 15:43:43 +0100
commit24472ce71832a2b516e0c96fe73587208eb33c65 (patch)
tree830e82e0781c96068110052dafb8047fb8622565 /searx/engines
parent359c18f9e633fe4cf1062bb2e7507840065e6ca5 (diff)
parentbda189565589b0065152f5a9fba4565404f9bd9a (diff)
Merge branch 'master' into docker/opencontainers
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/bing.py5
-rw-r--r--searx/engines/flickr_noapi.py26
-rw-r--r--searx/engines/ina.py9
-rw-r--r--searx/engines/microsoft_academic.py2
-rw-r--r--searx/engines/scanr_structures.py2
-rw-r--r--searx/engines/spotify.py14
6 files changed, 42 insertions, 16 deletions
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index ed0b87dbd..b193f7c60 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -89,8 +89,7 @@ def response(resp):
'content': content})
try:
- result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]/text()'))
- result_len_container = utils.to_string(result_len_container)
+ result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
if "-" in result_len_container:
# Remove the part "from-to" for paginated request ...
result_len_container = result_len_container[result_len_container.find("-") * 2 + 2:]
@@ -102,7 +101,7 @@ def response(resp):
logger.debug('result error :\n%s', e)
pass
- if _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
+ if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
return []
results.append({'number_of_results': result_len})
diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
index 198ac2cff..c8ee34f7a 100644
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -109,14 +109,22 @@ def response(resp):
else:
url = build_flickr_url(photo['ownerNsid'], photo['id'])
- results.append({'url': url,
- 'title': title,
- 'img_src': img_src,
- 'thumbnail_src': thumbnail_src,
- 'content': content,
- 'author': author,
- 'source': source,
- 'img_format': img_format,
- 'template': 'images.html'})
+ result = {
+ 'url': url,
+ 'img_src': img_src,
+ 'thumbnail_src': thumbnail_src,
+ 'source': source,
+ 'img_format': img_format,
+ 'template': 'images.html'
+ }
+ try:
+ result['author'] = author
+ result['title'] = title
+ result['content'] = content
+ except:
+ result['author'] = ''
+ result['title'] = ''
+ result['content'] = ''
+ results.append(result)
return results
diff --git a/searx/engines/ina.py b/searx/engines/ina.py
index 37a05f099..ea509649f 100644
--- a/searx/engines/ina.py
+++ b/searx/engines/ina.py
@@ -32,7 +32,7 @@ base_url = 'https://www.ina.fr'
search_url = base_url + '/layout/set/ajax/recherche/result?autopromote=&hf={ps}&b={start}&type=Video&r=&{query}'
# specific xpath variables
-results_xpath = '//div[contains(@class,"search-results--list")]/div[@class="media"]'
+results_xpath = '//div[contains(@class,"search-results--list")]//div[@class="media-body"]'
url_xpath = './/a/@href'
title_xpath = './/h3[@class="h3--title media-heading"]'
thumbnail_xpath = './/img/@src'
@@ -65,8 +65,11 @@ def response(resp):
videoid = result.xpath(url_xpath)[0]
url = base_url + videoid
title = p.unescape(extract_text(result.xpath(title_xpath)))
- thumbnail = extract_text(result.xpath(thumbnail_xpath)[0])
- if thumbnail[0] == '/':
+ try:
+ thumbnail = extract_text(result.xpath(thumbnail_xpath)[0])
+ except:
+ thumbnail = ''
+ if thumbnail and thumbnail[0] == '/':
thumbnail = base_url + thumbnail
d = extract_text(result.xpath(publishedDate_xpath)[0])
d = d.split('/')
diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py
index 9387b08d0..9bac0069c 100644
--- a/searx/engines/microsoft_academic.py
+++ b/searx/engines/microsoft_academic.py
@@ -45,6 +45,8 @@ def request(query, params):
def response(resp):
results = []
response_data = loads(resp.text)
+ if not response_data:
+ return results
for result in response_data['results']:
url = _get_url(result)
diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py
index 72fd2b3c9..7208dcb70 100644
--- a/searx/engines/scanr_structures.py
+++ b/searx/engines/scanr_structures.py
@@ -29,7 +29,7 @@ def request(query, params):
params['url'] = search_url
params['method'] = 'POST'
params['headers']['Content-type'] = "application/json"
- params['data'] = dumps({"query": query,
+ params['data'] = dumps({"query": query.decode('utf-8'),
"searchField": "ALL",
"sortDirection": "ASC",
"sortOrder": "RELEVANCY",
diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py
index aed756be3..00c395706 100644
--- a/searx/engines/spotify.py
+++ b/searx/engines/spotify.py
@@ -12,10 +12,14 @@
from json import loads
from searx.url_utils import urlencode
+import requests
+import base64
# engine dependent config
categories = ['music']
paging = True
+api_client_id = None
+api_client_secret = None
# search-url
url = 'https://api.spotify.com/'
@@ -31,6 +35,16 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
+ r = requests.post(
+ 'https://accounts.spotify.com/api/token',
+ data={'grant_type': 'client_credentials'},
+ headers={'Authorization': 'Basic ' + base64.b64encode(
+ "{}:{}".format(api_client_id, api_client_secret).encode('utf-8')
+ ).decode('utf-8')}
+ )
+ j = loads(r.text)
+ params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
+
return params