From 0059d08f13b1bf64b3f36ab2cbe89d5fec5d727c Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 15 Dec 2014 03:21:25 +0100 Subject: Rework Flickr Engine Everything was redone to use the API. It needs an API key, but it's worth it. Everything works. Title, Image, Content, URL The API allow lots of things. Thumbnails and date will be easy to add when it will be implemented in Searx. Fix asciimoo/searx#126 --- searx/engines/flickr.py | 81 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 26 deletions(-) (limited to 'searx/engines/flickr.py') diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 4ec2841dd..8b60aed1d 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -1,54 +1,83 @@ #!/usr/bin/env python +## Flickr (Images) +# +# @website https://www.flickr.com +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, thumbnail, img_src +#More info on api-key : https://www.flickr.com/services/apps/create/ + from urllib import urlencode -#from json import loads +from json import loads from urlparse import urljoin from lxml import html from time import time categories = ['images'] -url = 'https://secure.flickr.com/' -search_url = url+'search/?{query}&page={page}' -results_xpath = '//div[@class="view display-item-tile"]/figure/div' +nb_per_page = 15 +paging = True +api_key= None + + +url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True +def build_flickr_url(user_id, photo_id): + return photo_url.format(userid=user_id,photoid=photo_id) + def request(query, params): - params['url'] = search_url.format(query=urlencode({'text': query}), - page=params['pageno']) - time_string = str(int(time())-3) - params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh' - params['cookies']['xb'] = '421409' - params['cookies']['localization'] = 'en-us' - params['cookies']['flrbp'] = time_string +\ - '-3a8cdb85a427a33efda421fbda347b2eaf765a54' - params['cookies']['flrbs'] = time_string +\ - '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776' - params['cookies']['flrb'] = '9' + params['url'] = url.format(text=urlencode({'text': query}), + api_key=api_key, + nb_per_page=nb_per_page, + page=params['pageno']) return params def response(resp): results = [] - dom = html.fromstring(resp.text) - for result in dom.xpath(results_xpath): - img = result.xpath('.//img') + + search_results = loads(resp.text) - if not img: - continue + # return empty array if there are no results + if not 'photos' in search_results: + return [] + + if not 'photo' in search_results['photos']: + return [] - img = img[0] - img_src = 'https:'+img.attrib.get('src') + photos = search_results['photos']['photo'] - if not img_src: + # parse results + for photo in photos: + if 'url_o' in photo: + img_src = photo['url_o'] + elif 'url_z' in photo: + img_src = photo['url_z'] + else: continue - href = urljoin(url, result.xpath('.//a')[0].attrib.get('href')) - title = img.attrib.get('alt', '') - results.append({'url': href, + url = build_flickr_url(photo['owner'], photo['id']) + + title = photo['title'] + + content = ''+ photo['ownername'] +'
' + + content = content + ' ' + photo['description']['_content'] + '' + + # append result + results.append({'url': url, 'title': title, 'img_src': img_src, + 'content': content, 'template': 'images.html'}) + + # return results return results -- cgit v1.2.3 From 930f724ec639c167d870d716240ac5d4512beba2 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 16 Dec 2014 20:40:03 +0100 Subject: Add an No Api Flickr Engine It uses the webpage json infos to build the results Let the user choose the engine in setting.yml. Noapi active by default + little corrections on Flickr engine --- searx/engines/flickr.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'searx/engines/flickr.py') diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 8b60aed1d..2fa5ed7ec 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -13,9 +13,6 @@ from urllib import urlencode from json import loads -from urlparse import urljoin -from lxml import html -from time import time categories = ['images'] @@ -70,7 +67,7 @@ def response(resp): content = ''+ photo['ownername'] +'
' - content = content + ' ' + photo['description']['_content'] + '' + content = content + '' + photo['description']['_content'] + '' # append result results.append({'url': url, -- cgit v1.2.3 From 5d977056f7aa216eae09a22c3baaff73546f6ff1 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 29 Dec 2014 21:31:04 +0100 Subject: Flake8 and Twitter corrections Lots of Flake8 corrections Maybe we should change the rule to allow lines of 120 chars. It seems more usable. Big twitter correction : now it outputs the words in right order... --- searx/engines/flickr.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'searx/engines/flickr.py') diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 2fa5ed7ec..4dadd80a6 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -1,10 +1,10 @@ #!/usr/bin/env python ## Flickr (Images) -# +# # @website https://www.flickr.com -# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) -# +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# # @using-api yes # @results JSON # @stable yes @@ -18,16 +18,20 @@ categories = ['images'] nb_per_page = 15 paging = True -api_key= None +api_key = None -url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ + '&api_key={api_key}&{text}&sort=relevance' +\ + '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\ + '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True + def build_flickr_url(user_id, photo_id): - return photo_url.format(userid=user_id,photoid=photo_id) + return photo_url.format(userid=user_id, photoid=photo_id) def request(query, params): @@ -40,7 +44,7 @@ def request(query, params): def response(resp): results = [] - + search_results = loads(resp.text) # return empty array if there are no results @@ -64,11 +68,14 @@ def response(resp): url = build_flickr_url(photo['owner'], photo['id']) title = photo['title'] - - content = ''+ photo['ownername'] +'
' - - content = content + '' + photo['description']['_content'] + '' - + + content = '' +\ + photo['ownername'] +\ + '
' +\ + '' +\ + photo['description']['_content'] +\ + '' + # append result results.append({'url': url, 'title': title, -- cgit v1.2.3