diff options
Diffstat (limited to 'searx/engines')
| -rwxr-xr-x | searx/engines/base.py | 2 | ||||
| -rw-r--r-- | searx/engines/google_images.py | 41 | ||||
| -rw-r--r-- | searx/engines/piratebay.py | 2 | ||||
| -rw-r--r-- | searx/engines/wikidata.py | 3 |
4 files changed, 20 insertions, 28 deletions
diff --git a/searx/engines/base.py b/searx/engines/base.py index be0b7d247..f1b1cf671 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -55,7 +55,7 @@ shorcut_dict = { def request(query, params): # replace shortcuts with API advanced search keywords for key in shorcut_dict.keys(): - query = re.sub(str(key), str(shorcut_dict[key]), query) + query = re.sub(key, shorcut_dict[key], str(query)) # basic search offset = (params['pageno'] - 1) * number_of_results diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index a380170ca..504831a10 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -13,7 +13,7 @@ from datetime import date, timedelta from json import loads from lxml import html -from searx.url_utils import urlencode +from searx.url_utils import urlencode, urlparse, parse_qs # engine dependent config @@ -25,10 +25,9 @@ number_of_results = 100 search_url = 'https://www.google.com/search'\ '?{query}'\ - '&asearch=ichunk'\ - '&async=_id:rg_s,_pms:s'\ '&tbm=isch'\ - '&yv=2'\ + '&gbv=1'\ + '&sa=G'\ '&{search_options}' time_range_attr = "qdr:{range}" time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}" @@ -66,30 +65,22 @@ def request(query, params): def response(resp): results = [] - g_result = loads(resp.text) - - dom = html.fromstring(g_result[1][1]) + dom = html.fromstring(resp.text) # parse results - for result in dom.xpath('//div[@data-ved]'): - - try: - metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()'))) - except: - continue - - thumbnail_src = metadata['tu'] - - # http to https - thumbnail_src = thumbnail_src.replace("http://", "https://") - + for img in dom.xpath('//a'): + r = { + 'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')), + 'content': '', + 'template': 'images.html', + } + url = urlparse(img.xpath('.//@href')[0]) + query = parse_qs(url.query) + r['url'] = query['imgrefurl'][0] + r['img_src'] = query['imgurl'][0] + r['thumbnail_src'] = r['img_src'] # append result - results.append({'url': metadata['ru'], - 'title': metadata['pt'], - 'content': metadata['s'], - 'thumbnail_src': thumbnail_src, - 'img_src': metadata['ou'], - 'template': 'images.html'}) + results.append(r) # return results return results diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index a5af8d824..2f3f22a97 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -18,7 +18,7 @@ categories = ['videos', 'music', 'files'] paging = True # search-url -url = 'https://thepiratebay.se/' +url = 'https://thepiratebay.org/' search_url = url + 'search/{search_term}/{pageno}/99/{search_type}' # piratebay specific type-definitions diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index fe53609c1..c315b30da 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -54,6 +54,7 @@ value_xpath = './/div[contains(@class,"wikibase-statementview-mainsnak")]'\ + '/*/div[contains(@class,"wikibase-snakview-value")]' language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator")]' calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' +media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a' def request(query, params): @@ -313,7 +314,7 @@ def add_image(result): for property_id in property_ids: image = result.xpath(property_xpath.replace('{propertyid}', property_id)) if image: - image_name = image[0].xpath(value_xpath) + image_name = image[0].xpath(media_xpath) image_src = url_image.replace('{filename}', extract_text(image_name[0])) return image_src |