summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/google.py5
-rw-r--r--searx/engines/google_images.py61
2 files changed, 34 insertions, 32 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 67e6ebb87..b5e12e7cd 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -202,7 +202,10 @@ def request(query, params):
params['headers']['Accept-Language'] = language
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
if google_hostname == default_hostname:
- params['cookies']['PREF'] = get_google_pref_cookie()
+ try:
+ params['cookies']['PREF'] = get_google_pref_cookie()
+ except:
+ logger.warning('cannot fetch PREF cookie')
params['cookies']['NID'] = get_google_nid_cookie(google_hostname)
params['google_hostname'] = google_hostname
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 85963a16f..45e7ddbc3 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -2,41 +2,42 @@
Google (Images)
@website https://www.google.com
- @provide-api yes (https://developers.google.com/web-search/docs/),
- deprecated!
+ @provide-api yes (https://developers.google.com/custom-search/)
- @using-api yes
- @results JSON
- @stable yes (but deprecated)
+ @using-api no
+ @results HTML chunks with JSON inside
+ @stable no
@parse url, title, img_src
"""
-from urllib import urlencode, unquote
+from urllib import urlencode
+from urlparse import parse_qs
from json import loads
+from lxml import html
# engine dependent config
categories = ['images']
paging = True
safesearch = True
-# search-url
-url = 'https://ajax.googleapis.com/'
-search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe={safesearch}&filter=off&{query}'
+search_url = 'https://www.google.com/search'\
+ '?{query}'\
+ '&tbm=isch'\
+ '&ijn=1'\
+ '&start={offset}'
# do search-request
def request(query, params):
- offset = (params['pageno'] - 1) * 8
-
- if params['safesearch'] == 0:
- safesearch = 'off'
- else:
- safesearch = 'on'
+ offset = (params['pageno'] - 1) * 100
params['url'] = search_url.format(query=urlencode({'q': query}),
offset=offset,
safesearch=safesearch)
+ if safesearch and params['safesearch']:
+ params['url'] += '&' + urlencode({'safe': 'active'})
+
return params
@@ -44,30 +45,28 @@ def request(query, params):
def response(resp):
results = []
- search_res = loads(resp.text)
-
- # return empty array if there are no results
- if not search_res.get('responseData', {}).get('results'):
- return []
+ dom = html.fromstring(resp.text)
# parse results
- for result in search_res['responseData']['results']:
- href = result['originalContextUrl']
- title = result['title']
- if 'url' not in result:
- continue
- thumbnail_src = result['tbUrl']
+ for result in dom.xpath('//div[@data-ved]'):
+ data_url = result.xpath('./a/@href')[0]
+ data_query = {k: v[0] for k, v in parse_qs(data_url.split('?', 1)[1]).iteritems()}
+
+ metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0])
+
+ thumbnail_src = metadata['tu']
# http to https
thumbnail_src = thumbnail_src.replace("http://", "https://")
# append result
- results.append({'url': href,
- 'title': title,
- 'content': result['content'],
- 'thumbnail_src': thumbnail_src,
- 'img_src': unquote(result['url']),
+ results.append({'url': data_query['imgrefurl'],
+ 'title': metadata['pt'],
+ 'content': metadata['s'],
+ 'thumbnail_src': metadata['tu'],
+ 'img_src': data_query['imgurl'],
'template': 'images.html'})
# return results
+ print len(results)
return results