From 7937218be66f1fb3eff02bce308a4e5c78ba6672 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 9 Dec 2014 02:36:53 +0100 Subject: Use human readable date For DoB and DoD, wikipedia use a non standard ISO format, not easily readable. Now the date is displayed in an human readable form, using the language setting as locale if available. If not, it uses the default locale. --- searx/engines/wikidata.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index ab799e6ce..bda80cdca 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -1,6 +1,9 @@ import json from requests import get from urllib import urlencode +import locale +import time +import dateutil.parser result_count = 1 wikidata_host = 'https://www.wikidata.org' @@ -35,6 +38,16 @@ def response(resp): language = resp.search_params['language'].split('_')[0] if language == 'all': language = 'en' + + try: + locale.setlocale(locale.LC_ALL, str(resp.search_params['language'])) + except: + try: + locale.setlocale(locale.LC_ALL, 'en_US') + except: + pass + pass + url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'})) @@ -164,10 +177,12 @@ def getDetail(jsonresponse, wikidata_id, language): date_of_birth = get_time(claims, 'P569', None) if date_of_birth is not None: + date_of_birth = dateutil.parser.parse(date_of_birth[8:]).strftime(locale.nl_langinfo(locale.D_FMT)) attributes.append({'label': 'Date of birth', 'value': date_of_birth}) date_of_death = get_time(claims, 'P570', None) if date_of_death is not None: + date_of_death = dateutil.parser.parse(date_of_death[8:]).strftime(locale.nl_langinfo(locale.D_FMT)) attributes.append({'label': 'Date of death', 'value': date_of_death}) if len(attributes) == 0 and len(urls) == 2 and len(description) == 0: -- cgit v1.2.3 From 0059d08f13b1bf64b3f36ab2cbe89d5fec5d727c Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 15 Dec 2014 03:21:25 +0100 Subject: Rework Flickr Engine Everything was redone to use the API. It needs an API key, but it's worth it. Everything works. Title, Image, Content, URL The API allow lots of things. Thumbnails and date will be easy to add when it will be implemented in Searx. Fix asciimoo/searx#126 --- searx/engines/flickr.py | 81 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 26 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 4ec2841dd..8b60aed1d 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -1,54 +1,83 @@ #!/usr/bin/env python +## Flickr (Images) +# +# @website https://www.flickr.com +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, thumbnail, img_src +#More info on api-key : https://www.flickr.com/services/apps/create/ + from urllib import urlencode -#from json import loads +from json import loads from urlparse import urljoin from lxml import html from time import time categories = ['images'] -url = 'https://secure.flickr.com/' -search_url = url+'search/?{query}&page={page}' -results_xpath = '//div[@class="view display-item-tile"]/figure/div' +nb_per_page = 15 +paging = True +api_key= None + + +url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True +def build_flickr_url(user_id, photo_id): + return photo_url.format(userid=user_id,photoid=photo_id) + def request(query, params): - params['url'] = search_url.format(query=urlencode({'text': query}), - page=params['pageno']) - time_string = str(int(time())-3) - params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh' - params['cookies']['xb'] = '421409' - params['cookies']['localization'] = 'en-us' - params['cookies']['flrbp'] = time_string +\ - '-3a8cdb85a427a33efda421fbda347b2eaf765a54' - params['cookies']['flrbs'] = time_string +\ - '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776' - params['cookies']['flrb'] = '9' + params['url'] = url.format(text=urlencode({'text': query}), + api_key=api_key, + nb_per_page=nb_per_page, + page=params['pageno']) return params def response(resp): results = [] - dom = html.fromstring(resp.text) - for result in dom.xpath(results_xpath): - img = result.xpath('.//img') + + search_results = loads(resp.text) - if not img: - continue + # return empty array if there are no results + if not 'photos' in search_results: + return [] + + if not 'photo' in search_results['photos']: + return [] - img = img[0] - img_src = 'https:'+img.attrib.get('src') + photos = search_results['photos']['photo'] - if not img_src: + # parse results + for photo in photos: + if 'url_o' in photo: + img_src = photo['url_o'] + elif 'url_z' in photo: + img_src = photo['url_z'] + else: continue - href = urljoin(url, result.xpath('.//a')[0].attrib.get('href')) - title = img.attrib.get('alt', '') - results.append({'url': href, + url = build_flickr_url(photo['owner'], photo['id']) + + title = photo['title'] + + content = ''+ photo['ownername'] +'
' + + content = content + ' ' + photo['description']['_content'] + '' + + # append result + results.append({'url': url, 'title': title, 'img_src': img_src, + 'content': content, 'template': 'images.html'}) + + # return results return results -- cgit v1.2.3 From 930f724ec639c167d870d716240ac5d4512beba2 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 16 Dec 2014 20:40:03 +0100 Subject: Add an No Api Flickr Engine It uses the webpage json infos to build the results Let the user choose the engine in setting.yml. Noapi active by default + little corrections on Flickr engine --- searx/engines/flickr-noapi.py | 102 ++++++++++++++++++++++++++++++++++++++++++ searx/engines/flickr.py | 5 +-- 2 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 searx/engines/flickr-noapi.py (limited to 'searx/engines') diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py new file mode 100644 index 000000000..b44affec6 --- /dev/null +++ b/searx/engines/flickr-noapi.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +## Flickr (Images) +# +# @website https://www.flickr.com +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# +# @using-api no +# @results HTML +# @stable no +# @parse url, title, thumbnail, img_src + +from urllib import urlencode +from json import loads +from urlparse import urljoin +from lxml import html +import re + +categories = ['images'] + +url = 'https://secure.flickr.com/' +search_url = url+'search/?{query}&page={page}' +photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' +regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) + +paging = True + +def build_flickr_url(user_id, photo_id): + return photo_url.format(userid=user_id,photoid=photo_id) + + +def request(query, params): + params['url'] = search_url.format(query=urlencode({'text': query}), + page=params['pageno']) + return params + + +def response(resp): + results = [] + + matches = regex.search(resp.text) + + if matches == None: + return results + + match = matches.group(1) + search_results = loads(match) + + if not '_data' in search_results: + return [] + + photos = search_results['_data'] + + for photo in photos: + + # In paged configuration, the first pages' photos are represented by a None object + if photo == None: + continue + + # From the biggest to the lowest format + if 'o' in photo['sizes']: + img_src = photo['sizes']['o']['displayUrl'] + elif 'k' in photo['sizes']: + img_src = photo['sizes']['k']['displayUrl'] + elif 'h' in photo['sizes']: + img_src = photo['sizes']['h']['displayUrl'] + elif 'b' in photo['sizes']: + img_src = photo['sizes']['b']['displayUrl'] + elif 'c' in photo['sizes']: + img_src = photo['sizes']['c']['displayUrl'] + elif 'z' in photo['sizes']: + img_src = photo['sizes']['z']['displayUrl'] + elif 'n' in photo['sizes']: + img_src = photo['sizes']['n']['displayUrl'] + elif 'm' in photo['sizes']: + img_src = photo['sizes']['m']['displayUrl'] + elif 't' in photo['sizes']: + img_src = photo['sizes']['to']['displayUrl'] + elif 'q' in photo['sizes']: + img_src = photo['sizes']['q']['displayUrl'] + elif 's' in photo['sizes']: + img_src = photo['sizes']['s']['displayUrl'] + else: + continue + + url = build_flickr_url(photo['owner']['id'], photo['id']) + + title = photo['title'] + + content = ''+ photo['owner']['username'] +'
' + + if 'description' in photo: + content = content + '' + photo['description'] + '' + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'content': content, + 'template': 'images.html'}) + + return results diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 8b60aed1d..2fa5ed7ec 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -13,9 +13,6 @@ from urllib import urlencode from json import loads -from urlparse import urljoin -from lxml import html -from time import time categories = ['images'] @@ -70,7 +67,7 @@ def response(resp): content = ''+ photo['ownername'] +'
' - content = content + ' ' + photo['description']['_content'] + '' + content = content + '' + photo['description']['_content'] + '' # append result results.append({'url': url, -- cgit v1.2.3 From 550232fc21ff2c3ae9a5de3d8b999de66c96171c Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 22 Dec 2014 01:00:16 +0100 Subject: SubtitleSeeker Engine Add the subtitleseeker engine. --- searx/engines/subtitleseeker.py | 59 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 searx/engines/subtitleseeker.py (limited to 'searx/engines') diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py new file mode 100644 index 000000000..346298300 --- /dev/null +++ b/searx/engines/subtitleseeker.py @@ -0,0 +1,59 @@ +## Subtitleseeker (Video) +# +# @website http://www.subtitleseeker.com +# @provide-api no +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, content + +from cgi import escape +from urllib import quote_plus +from lxml import html + +# engine dependent config +categories = ['videos'] +paging = True + +# search-url +url = 'http://www.subtitleseeker.com/' +search_url = url+'search/TITLES/{query}&p={pageno}' + +# specific xpath variables +results_xpath = '//div[@class="boxRows"]' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=quote_plus(query), + pageno=params['pageno']) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath(results_xpath): + link = result.xpath(".//a")[0] + href = link.attrib.get('href') + title = escape(link.xpath(".//text()")[0]) + + content = result.xpath('.//div[contains(@class,"red")]//text()')[0] + content = content + " - " + content = content + html.tostring(result.xpath('.//div[contains(@class,"grey-web")]')[0], method='text') + + if result.xpath(".//span") != []: + content = content + " - (" + result.xpath(".//span//text()")[0].strip() + ")" + + # append result + results.append({'url': href, + 'title': title, + 'content': escape(content)}) + + # return results + return results -- cgit v1.2.3 From b975418e4ce33aef530f7ad88e100d47d73e4761 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 22 Dec 2014 14:15:59 +0100 Subject: [fix] flickr engine code cleanup ++ handle missing owner --- searx/engines/flickr-noapi.py | 77 ++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 45 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py index b44affec6..522503b53 100644 --- a/searx/engines/flickr-noapi.py +++ b/searx/engines/flickr-noapi.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -## Flickr (Images) -# +# Flickr (Images) +# # @website https://www.flickr.com -# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) -# +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# # @using-api no # @results HTML # @stable no @@ -12,8 +12,6 @@ from urllib import urlencode from json import loads -from urlparse import urljoin -from lxml import html import re categories = ['images'] @@ -22,11 +20,13 @@ url = 'https://secure.flickr.com/' search_url = url+'search/?{query}&page={page}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) +image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') paging = True + def build_flickr_url(user_id, photo_id): - return photo_url.format(userid=user_id,photoid=photo_id) + return photo_url.format(userid=user_id, photoid=photo_id) def request(query, params): @@ -37,58 +37,45 @@ def request(query, params): def response(resp): results = [] - + matches = regex.search(resp.text) - - if matches == None: + + if matches is None: return results match = matches.group(1) search_results = loads(match) - - if not '_data' in search_results: + + if '_data' not in search_results: return [] - + photos = search_results['_data'] - + for photo in photos: - + # In paged configuration, the first pages' photos are represented by a None object - if photo == None: + if photo is None: continue - + + img_src = None # From the biggest to the lowest format - if 'o' in photo['sizes']: - img_src = photo['sizes']['o']['displayUrl'] - elif 'k' in photo['sizes']: - img_src = photo['sizes']['k']['displayUrl'] - elif 'h' in photo['sizes']: - img_src = photo['sizes']['h']['displayUrl'] - elif 'b' in photo['sizes']: - img_src = photo['sizes']['b']['displayUrl'] - elif 'c' in photo['sizes']: - img_src = photo['sizes']['c']['displayUrl'] - elif 'z' in photo['sizes']: - img_src = photo['sizes']['z']['displayUrl'] - elif 'n' in photo['sizes']: - img_src = photo['sizes']['n']['displayUrl'] - elif 'm' in photo['sizes']: - img_src = photo['sizes']['m']['displayUrl'] - elif 't' in photo['sizes']: - img_src = photo['sizes']['to']['displayUrl'] - elif 'q' in photo['sizes']: - img_src = photo['sizes']['q']['displayUrl'] - elif 's' in photo['sizes']: - img_src = photo['sizes']['s']['displayUrl'] - else: + for image_size in image_sizes: + if image_size in photo['sizes']: + img_src = photo['sizes'][image_size]['displayUrl'] + break + + if not img_src: + continue + + if 'id' not in photo['owner']: continue - + url = build_flickr_url(photo['owner']['id'], photo['id']) title = photo['title'] - - content = ''+ photo['owner']['username'] +'
' - + + content = '' + photo['owner']['username'] + '
' + if 'description' in photo: content = content + '' + photo['description'] + '' @@ -98,5 +85,5 @@ def response(resp): 'img_src': img_src, 'content': content, 'template': 'images.html'}) - + return results -- cgit v1.2.3 From 829948b85df0510e331372bcd60cb31db9c96a5c Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 23 Dec 2014 01:41:25 +0100 Subject: Add language support Allow the user to select a language. It must be written in english, and capitalized, ie : English, French, German, Hungarian... --- searx/engines/subtitleseeker.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 346298300..c72f81899 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -16,6 +16,8 @@ from lxml import html categories = ['videos'] paging = True +language = "" + # search-url url = 'http://www.subtitleseeker.com/' search_url = url+'search/TITLES/{query}&p={pageno}' @@ -41,6 +43,10 @@ def response(resp): for result in dom.xpath(results_xpath): link = result.xpath(".//a")[0] href = link.attrib.get('href') + + if language is not "": + href = href + language + "/" + title = escape(link.xpath(".//text()")[0]) content = result.xpath('.//div[contains(@class,"red")]//text()')[0] -- cgit v1.2.3 From 2ea55b1c6451e77381bd88dd82f635d48ff1b6fe Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 23 Dec 2014 01:45:39 +0100 Subject: Add language support Allow the user to select a language. It must be written in english, and capitalized, ie : English, French, German, Hungarian... (reverted from commit 829948b85df0510e331372bcd60cb31db9c96a5c) --- searx/engines/subtitleseeker.py | 6 ------ 1 file changed, 6 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index c72f81899..346298300 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -16,8 +16,6 @@ from lxml import html categories = ['videos'] paging = True -language = "" - # search-url url = 'http://www.subtitleseeker.com/' search_url = url+'search/TITLES/{query}&p={pageno}' @@ -43,10 +41,6 @@ def response(resp): for result in dom.xpath(results_xpath): link = result.xpath(".//a")[0] href = link.attrib.get('href') - - if language is not "": - href = href + language + "/" - title = escape(link.xpath(".//text()")[0]) content = result.xpath('.//div[contains(@class,"red")]//text()')[0] -- cgit v1.2.3 From 10e4f6f31631fe51d16b324223525570f3e75850 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 23 Dec 2014 01:51:07 +0100 Subject: Add language support Allow the user to select a language. It must be written in english, and capitalized, ie : English, French, German, Hungarian... --- searx/engines/subtitleseeker.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 346298300..48790a35c 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -15,6 +15,7 @@ from lxml import html # engine dependent config categories = ['videos'] paging = True +language = "" # search-url url = 'http://www.subtitleseeker.com/' @@ -41,6 +42,10 @@ def response(resp): for result in dom.xpath(results_xpath): link = result.xpath(".//a")[0] href = link.attrib.get('href') + + if language is not "": + href = href + language + "/" + title = escape(link.xpath(".//text()")[0]) content = result.xpath('.//div[contains(@class,"red")]//text()')[0] -- cgit v1.2.3 From 3b3921fc593e49c12ff79df1d6b15d01fe481bec Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Wed, 24 Dec 2014 21:02:26 +0100 Subject: [enh] subtitleseeker: better language handling --- searx/engines/subtitleseeker.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'searx/engines') diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 48790a35c..2f1636f59 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -11,6 +11,7 @@ from cgi import escape from urllib import quote_plus from lxml import html +from searx.languages import language_codes # engine dependent config categories = ['videos'] @@ -38,13 +39,22 @@ def response(resp): dom = html.fromstring(resp.text) + search_lang = "" + + if resp.search_params['language'] != 'all': + search_lang = [lc[1] + for lc in language_codes + if lc[0][:2] == resp.search_params['language']][0] + # parse results for result in dom.xpath(results_xpath): link = result.xpath(".//a")[0] href = link.attrib.get('href') if language is not "": - href = href + language + "/" + href = href + language + '/' + elif search_lang: + href = href + search_lang + '/' title = escape(link.xpath(".//text()")[0]) -- cgit v1.2.3 From e7e298153678fc0e77e24a3ae3b333b1230136b2 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 28 Dec 2014 22:57:59 +0100 Subject: Digg + Twitter corrections Digg engines, with thumbnails Add pubdate for twitter --- searx/engines/digg.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ searx/engines/twitter.py | 22 +++++++++++----- 2 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 searx/engines/digg.py (limited to 'searx/engines') diff --git a/searx/engines/digg.py b/searx/engines/digg.py new file mode 100644 index 000000000..4ebfe58c1 --- /dev/null +++ b/searx/engines/digg.py @@ -0,0 +1,66 @@ +## Digg (News, Social media) +# +# @website https://digg.com/ +# @provide-api no +# +# @using-api no +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, content, publishedDate, thumbnail + +from urllib import quote_plus +from json import loads +from lxml import html +from cgi import escape +from dateutil import parser + +# engine dependent config +categories = ['news', 'social media'] +paging = True + +# search-url +base_url = 'https://digg.com/' +search_url = base_url+'api/search/{query}.json?position={position}&format=html' + +# specific xpath variables +results_xpath = '//article' +link_xpath = './/small[@class="time"]//a' +title_xpath = './/h2//a//text()' +content_xpath = './/p//text()' +pubdate_xpath = './/time' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + params['url'] = search_url.format(position=offset, + query=quote_plus(query)) + return params + + +# get response from search-request +def response(resp): + results = [] + + search_result = loads(resp.text) + + dom = html.fromstring(search_result['html']) + + # parse results + for result in dom.xpath(results_xpath): + url = result.attrib.get('data-contenturl') + thumbnail = result.xpath('.//img')[0].attrib.get('src') + title = ''.join(result.xpath(title_xpath)) + content = escape(''.join(result.xpath(content_xpath))) + publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime')) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'thumbnail': thumbnail}) + + # return results + return results diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 0689150c8..5a7046c83 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -1,6 +1,6 @@ ## Twitter (Social media) # -# @website https://www.bing.com/news +# @website https://twitter.com/ # @provide-api yes (https://dev.twitter.com/docs/using-search) # # @using-api no @@ -14,6 +14,7 @@ from urlparse import urljoin from urllib import urlencode from lxml import html from cgi import escape +from datetime import datetime # engine dependent config categories = ['social media'] @@ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]' link_xpath = './/small[@class="time"]//a' title_xpath = './/span[@class="username js-action-profile-name"]//text()' content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' +timestamp_xpath = './/span[contains(@class,"_timestamp")]' # do search-request @@ -53,11 +55,19 @@ def response(resp): url = urljoin(base_url, link.attrib.get('href')) title = ''.join(tweet.xpath(title_xpath)) content = escape(''.join(tweet.xpath(content_xpath))) - - # append result - results.append({'url': url, - 'title': title, - 'content': content}) + pubdate = tweet.xpath(timestamp_xpath) + if len(pubdate) > 0: + publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None) + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'publishedDate': publishedDate}) + else: + # append result + results.append({'url': url, + 'title': title, + 'content': content}) # return results return results -- cgit v1.2.3 From 5d977056f7aa216eae09a22c3baaff73546f6ff1 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 29 Dec 2014 21:31:04 +0100 Subject: Flake8 and Twitter corrections Lots of Flake8 corrections Maybe we should change the rule to allow lines of 120 chars. It seems more usable. Big twitter correction : now it outputs the words in right order... --- searx/engines/500px.py | 4 ++-- searx/engines/__init__.py | 4 ++-- searx/engines/digg.py | 3 ++- searx/engines/flickr-noapi.py | 12 +++++++++--- searx/engines/flickr.py | 31 +++++++++++++++++++------------ searx/engines/kickass.py | 5 +++-- searx/engines/searchcode_code.py | 18 +++++++++--------- searx/engines/searchcode_doc.py | 15 +++++++++++---- searx/engines/subtitleseeker.py | 8 ++++++-- searx/engines/twitter.py | 7 ++++--- 10 files changed, 67 insertions(+), 40 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/500px.py b/searx/engines/500px.py index 5d53af32c..3b95619a1 100644 --- a/searx/engines/500px.py +++ b/searx/engines/500px.py @@ -35,9 +35,9 @@ def request(query, params): # get response from search-request def response(resp): results = [] - + dom = html.fromstring(resp.text) - + # parse results for result in dom.xpath('//div[@class="photo"]'): link = result.xpath('.//a')[0] diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index d42339af8..9bc5cdfd4 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -81,7 +81,7 @@ def load_engine(engine_data): if engine_attr.startswith('_'): continue if getattr(engine, engine_attr) is None: - print('[E] Engine config error: Missing attribute "{0}.{1}"'\ + print('[E] Engine config error: Missing attribute "{0}.{1}"' .format(engine.name, engine_attr)) sys.exit(1) @@ -102,7 +102,7 @@ def load_engine(engine_data): if engine.shortcut: # TODO check duplications if engine.shortcut in engine_shortcuts: - print('[E] Engine config error: ambigious shortcut: {0}'\ + print('[E] Engine config error: ambigious shortcut: {0}' .format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 4ebfe58c1..241234fdb 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -52,7 +52,8 @@ def response(resp): thumbnail = result.xpath('.//img')[0].attrib.get('src') title = ''.join(result.xpath(title_xpath)) content = escape(''.join(result.xpath(content_xpath))) - publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime')) + pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime') + publishedDate = parser.parse(pubdate) # append result results.append({'url': url, diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py index 522503b53..f90903647 100644 --- a/searx/engines/flickr-noapi.py +++ b/searx/engines/flickr-noapi.py @@ -53,7 +53,8 @@ def response(resp): for photo in photos: - # In paged configuration, the first pages' photos are represented by a None object + # In paged configuration, the first pages' photos + # are represented by a None object if photo is None: continue @@ -74,10 +75,15 @@ def response(resp): title = photo['title'] - content = '' + photo['owner']['username'] + '
' + content = '' +\ + photo['owner']['username'] +\ + '
' if 'description' in photo: - content = content + '' + photo['description'] + '' + content = content +\ + '' +\ + photo['description'] +\ + '' # append result results.append({'url': url, diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 2fa5ed7ec..4dadd80a6 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -1,10 +1,10 @@ #!/usr/bin/env python ## Flickr (Images) -# +# # @website https://www.flickr.com -# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) -# +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# # @using-api yes # @results JSON # @stable yes @@ -18,16 +18,20 @@ categories = ['images'] nb_per_page = 15 paging = True -api_key= None +api_key = None -url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ + '&api_key={api_key}&{text}&sort=relevance' +\ + '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\ + '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True + def build_flickr_url(user_id, photo_id): - return photo_url.format(userid=user_id,photoid=photo_id) + return photo_url.format(userid=user_id, photoid=photo_id) def request(query, params): @@ -40,7 +44,7 @@ def request(query, params): def response(resp): results = [] - + search_results = loads(resp.text) # return empty array if there are no results @@ -64,11 +68,14 @@ def response(resp): url = build_flickr_url(photo['owner'], photo['id']) title = photo['title'] - - content = ''+ photo['ownername'] +'
' - - content = content + '' + photo['description']['_content'] + '' - + + content = '' +\ + photo['ownername'] +\ + '
' +\ + '' +\ + photo['description']['_content'] +\ + '' + # append result results.append({'url': url, 'title': title, diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index f1fcd9e1a..16e9d6de6 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -24,7 +24,7 @@ search_url = url + 'search/{search_term}/{pageno}/' # specific xpath variables magnet_xpath = './/a[@title="Torrent magnet link"]' -#content_xpath = './/font[@class="detDesc"]//text()' +content_xpath = './/span[@class="font11px lightgrey block"]' # do search-request @@ -56,7 +56,8 @@ def response(resp): link = result.xpath('.//a[@class="cellMainLink"]')[0] href = urljoin(url, link.attrib['href']) title = ' '.join(link.xpath('.//text()')) - content = escape(html.tostring(result.xpath('.//span[@class="font11px lightgrey block"]')[0], method="text")) + content = escape(html.tostring(result.xpath(content_xpath)[0], + method="text")) seed = result.xpath('.//td[contains(@class, "green")]/text()')[0] leech = result.xpath('.//td[contains(@class, "red")]/text()')[0] diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 2ba0e52f1..0f98352c1 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -11,7 +11,6 @@ from urllib import urlencode from json import loads import cgi -import re # engine dependent config categories = ['it'] @@ -33,7 +32,7 @@ def request(query, params): # get response from search-request def response(resp): results = [] - + search_results = loads(resp.text) # parse results @@ -41,21 +40,22 @@ def response(resp): href = result['url'] title = "" + result['name'] + " - " + result['filename'] content = result['repo'] + "
" - + lines = dict() for line, code in result['lines'].items(): lines[int(line)] = code content = content + '
'
         for line, code in sorted(lines.items()):
-            content = content + '"
-            
+
         content = content + "
' - content = content + str(line) + '' - # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary - content = content + cgi.escape(code).replace('\t', ' ').replace(' ', '  ').replace(' ', '  ') + content = content + '
' + content = content + str(line) + '' + # Replace every two spaces with ' &nbps;' to keep formatting + # while allowing the browser to break the line if necessary + content = content + cgi.escape(code).replace('\t', ' ').replace(' ', '  ').replace(' ', '  ') content = content + "
" - + # append result results.append({'url': href, 'title': title, diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index e07cbeab9..b5b7159be 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -31,15 +31,22 @@ def request(query, params): # get response from search-request def response(resp): results = [] - + search_results = loads(resp.text) # parse results for result in search_results['results']: href = result['url'] - title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name'] - content = '[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "
" + result['description'] - + title = "[" + result['type'] + "] " +\ + result['namespace'] +\ + " " + result['name'] + content = '[' +\ + result['type'] + "] " +\ + result['name'] + " " +\ + result['synopsis'] +\ + "
" +\ + result['description'] + # append result results.append({'url': href, 'title': title, diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 2f1636f59..c413dcf26 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -60,10 +60,14 @@ def response(resp): content = result.xpath('.//div[contains(@class,"red")]//text()')[0] content = content + " - " - content = content + html.tostring(result.xpath('.//div[contains(@class,"grey-web")]')[0], method='text') + text = result.xpath('.//div[contains(@class,"grey-web")]')[0] + content = content + html.tostring(text, method='text') if result.xpath(".//span") != []: - content = content + " - (" + result.xpath(".//span//text()")[0].strip() + ")" + content = content +\ + " - (" +\ + result.xpath(".//span//text()")[0].strip() +\ + ")" # append result results.append({'url': href, diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 5a7046c83..bd9a8c2fc 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -28,7 +28,7 @@ search_url = base_url+'search?' results_xpath = '//li[@data-item-type="tweet"]' link_xpath = './/small[@class="time"]//a' title_xpath = './/span[@class="username js-action-profile-name"]//text()' -content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' +content_xpath = './/p[@class="js-tweet-text tweet-text"]' timestamp_xpath = './/span[contains(@class,"_timestamp")]' @@ -54,10 +54,11 @@ def response(resp): link = tweet.xpath(link_xpath)[0] url = urljoin(base_url, link.attrib.get('href')) title = ''.join(tweet.xpath(title_xpath)) - content = escape(''.join(tweet.xpath(content_xpath))) + content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8")) pubdate = tweet.xpath(timestamp_xpath) if len(pubdate) > 0: - publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None) + timestamp = float(pubdate[0].attrib.get('data-time')) + publishedDate = datetime.fromtimestamp(timestamp, None) # append result results.append({'url': url, 'title': title, -- cgit v1.2.3 From 2181c4384ed4d41c795799a345974269327bf641 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Thu, 1 Jan 2015 14:14:56 +0100 Subject: [mod] purge local html_to_text --- searx/engines/duckduckgo_definitions.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 8f81d2c8e..b66d6c0f2 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,6 +1,7 @@ import json from urllib import urlencode from lxml import html +from searx.utils import html_to_text from searx.engines.xpath import extract_text url = 'https://api.duckduckgo.com/'\ @@ -17,11 +18,6 @@ def result_to_text(url, text, htmlResult): return text -def html_to_text(htmlFragment): - dom = html.fromstring(htmlFragment) - return extract_text(dom) - - def request(query, params): # TODO add kl={locale} params['url'] = url.format(query=urlencode({'q': query})) -- cgit v1.2.3 From cc4e17b6686dbefe0d57862e045f98f72a4e58fc Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Fri, 2 Jan 2015 12:33:40 +0100 Subject: [fix] pep8 --- searx/engines/flickr-noapi.py | 6 +++--- searx/engines/subtitleseeker.py | 6 +++--- searx/engines/wikidata.py | 5 ++--- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py index f90903647..aa2fa5d3b 100644 --- a/searx/engines/flickr-noapi.py +++ b/searx/engines/flickr-noapi.py @@ -81,9 +81,9 @@ def response(resp): if 'description' in photo: content = content +\ - '' +\ - photo['description'] +\ - '' + '' +\ + photo['description'] +\ + '' # append result results.append({'url': url, diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index c413dcf26..9aaf1947b 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -65,9 +65,9 @@ def response(resp): if result.xpath(".//span") != []: content = content +\ - " - (" +\ - result.xpath(".//span//text()")[0].strip() +\ - ")" + " - (" +\ + result.xpath(".//span//text()")[0].strip() +\ + ")" # append result results.append({'url': href, diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index bda80cdca..df976ae35 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -2,7 +2,6 @@ import json from requests import get from urllib import urlencode import locale -import time import dateutil.parser result_count = 1 @@ -38,7 +37,7 @@ def response(resp): language = resp.search_params['language'].split('_')[0] if language == 'all': language = 'en' - + try: locale.setlocale(locale.LC_ALL, str(resp.search_params['language'])) except: @@ -47,7 +46,7 @@ def response(resp): except: pass pass - + url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'})) -- cgit v1.2.3 From 4450ed5503ab9f7b4d0dc1849837523bbe3b56dd Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 3 Jan 2015 23:54:02 +0100 Subject: Digg correction Return no result instead of crashing if no result --- searx/engines/digg.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 241234fdb..8c457d6b9 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -44,6 +44,9 @@ def response(resp): search_result = loads(resp.text) + if search_result['html'] == '': + return results + dom = html.fromstring(search_result['html']) # parse results -- cgit v1.2.3 From 3aa3a4633f50fa50693636113a4141e266db90d7 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 3 Jan 2015 23:55:50 +0100 Subject: Few fixes on Vimeo Change URL from https to http Change way of handling text xpath --- searx/engines/vimeo.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index c66c4148a..3949a7299 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -13,24 +13,23 @@ # @todo set content-parameter with correct data from urllib import urlencode -from HTMLParser import HTMLParser from lxml import html -from searx.engines.xpath import extract_text from dateutil import parser +from cgi import escape # engine dependent config categories = ['videos'] paging = True # search-url -base_url = 'https://vimeo.com' +base_url = 'http://vimeo.com' search_url = base_url + '/search/page:{pageno}?{query}' # specific xpath variables +results_xpath = '//div[@id="browse_content"]/ol/li' url_xpath = './a/@href' +title_xpath = './a/div[@class="data"]/p[@class="title"]' content_xpath = './a/img/@src' -title_xpath = './a/div[@class="data"]/p[@class="title"]/text()' -results_xpath = '//div[@id="browse_content"]/ol/li' publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' @@ -39,10 +38,6 @@ def request(query, params): params['url'] = search_url.format(pageno=params['pageno'], query=urlencode({'q': query})) - # TODO required? - params['cookies']['__utma'] =\ - '00000000.000#0000000.0000000000.0000000000.0000000000.0' - return params @@ -52,15 +47,12 @@ def response(resp): dom = html.fromstring(resp.text) - p = HTMLParser() - # parse results for result in dom.xpath(results_xpath): url = base_url + result.xpath(url_xpath)[0] - title = p.unescape(extract_text(result.xpath(title_xpath))) - thumbnail = extract_text(result.xpath(content_xpath)[0]) - publishedDate = parser.parse(extract_text( - result.xpath(publishedDate_xpath)[0])) + title = escape(html.tostring(result.xpath(title_xpath)[0], method='text', encoding='UTF-8').decode("utf-8")) + thumbnail = result.xpath(content_xpath)[0] + publishedDate = parser.parse(result.xpath(publishedDate_xpath)[0]) # append result results.append({'url': url, -- cgit v1.2.3 From 4a195e0b28fdd940e046c442032c816095416fec Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 5 Jan 2015 02:04:23 +0100 Subject: Integrated media in results + Deezer Engine New "embedded" item for the results, allow to give an iframe to display the media directly in the results. Note that the attributes src of the iframes are not set, but instead data-src is set, allowing to only load the iframe when clicked. Deezer engine based on public API (no key). --- searx/engines/dailymotion.py | 15 ++++++++--- searx/engines/deezer.py | 62 ++++++++++++++++++++++++++++++++++++++++++++ searx/engines/soundcloud.py | 14 ++++++++-- searx/engines/vimeo.py | 11 ++++++-- searx/engines/youtube.py | 13 ++++++++-- 5 files changed, 105 insertions(+), 10 deletions(-) create mode 100644 searx/engines/deezer.py (limited to 'searx/engines') diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index a5bffa866..03b1dbb8b 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -6,12 +6,14 @@ # @using-api yes # @results JSON # @stable yes -# @parse url, title, thumbnail +# @parse url, title, thumbnail, publishedDate, embedded # # @todo set content-parameter with correct data from urllib import urlencode from json import loads +from cgi import escape +from datetime import datetime # engine dependent config categories = ['videos'] @@ -20,7 +22,9 @@ language_support = True # search-url # see http://www.dailymotion.com/doc/api/obj-video.html -search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=5&page={pageno}&{query}' # noqa +search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa +embedded_url = '' # do search-request @@ -51,14 +55,17 @@ def response(resp): for res in search_res['list']: title = res['title'] url = res['url'] - #content = res['description'] - content = '' + content = escape(res['description']) thumbnail = res['thumbnail_360_url'] + publishedDate = datetime.fromtimestamp(res['created_time'], None) + embedded = embedded_url.format(videoid=res['id']) results.append({'template': 'videos.html', 'url': url, 'title': title, 'content': content, + 'publishedDate': publishedDate, + 'embedded': embedded, 'thumbnail': thumbnail}) # return results diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py new file mode 100644 index 000000000..6c26b6aeb --- /dev/null +++ b/searx/engines/deezer.py @@ -0,0 +1,62 @@ +## Deezer (Music) +# +# @website https://deezer.com +# @provide-api yes (http://developers.deezer.com/api/) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, embedded + +from json import loads +from urllib import urlencode + +# engine dependent config +categories = ['music'] +paging = True + +# search-url +url = 'http://api.deezer.com/' +search_url = url + 'search?{query}&index={offset}' + +embedded_url = '' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 25 + + params['url'] = search_url.format(query=urlencode({'q': query}), + offset=offset) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_res = loads(resp.text) + + # parse results + for result in search_res.get('data', []): + if result['type'] == 'track': + print result + title = result['title'] + url = result['link'] + content = result['artist']['name'] +\ + " • " +\ + result['album']['title'] +\ + " • " + result['title'] + embedded = embedded_url.format(audioid=result['id']) + + # append result + results.append({'url': url, + 'title': title, + 'embedded': embedded, + 'content': content}) + + # return results + return results diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 164a569a3..44374af6f 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -6,10 +6,11 @@ # @using-api yes # @results JSON # @stable yes -# @parse url, title, content +# @parse url, title, content, publishedDate, embedded from json import loads -from urllib import urlencode +from urllib import urlencode, quote_plus +from dateutil import parser # engine dependent config categories = ['music'] @@ -27,6 +28,10 @@ search_url = url + 'search?{query}'\ '&linked_partitioning=1'\ '&client_id={client_id}' # noqa +embedded_url = '' + # do search-request def request(query, params): @@ -50,10 +55,15 @@ def response(resp): if result['kind'] in ('track', 'playlist'): title = result['title'] content = result['description'] + publishedDate = parser.parse(result['last_modified']) + uri = quote_plus(result['uri']) + embedded = embedded_url.format(uri=uri) # append result results.append({'url': result['permalink_url'], 'title': title, + 'publishedDate': publishedDate, + 'embedded': embedded, 'content': content}) # return results diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index c66c4148a..fd945b319 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -7,7 +7,7 @@ # @using-api no (TODO, rewrite to api) # @results HTML (using search portal) # @stable no (HTML can change) -# @parse url, title, publishedDate, thumbnail +# @parse url, title, publishedDate, thumbnail, embedded # # @todo rewrite to api # @todo set content-parameter with correct data @@ -33,6 +33,10 @@ title_xpath = './a/div[@class="data"]/p[@class="title"]/text()' results_xpath = '//div[@id="browse_content"]/ol/li' publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' +embedded_url = '' + # do search-request def request(query, params): @@ -56,11 +60,13 @@ def response(resp): # parse results for result in dom.xpath(results_xpath): - url = base_url + result.xpath(url_xpath)[0] + videoid = result.xpath(url_xpath)[0] + url = base_url + videoid title = p.unescape(extract_text(result.xpath(title_xpath))) thumbnail = extract_text(result.xpath(content_xpath)[0]) publishedDate = parser.parse(extract_text( result.xpath(publishedDate_xpath)[0])) + embedded = embedded_url.format(videoid=videoid) # append result results.append({'url': url, @@ -68,6 +74,7 @@ def response(resp): 'content': '', 'template': 'videos.html', 'publishedDate': publishedDate, + 'embedded': embedded, 'thumbnail': thumbnail}) # return results diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py index 973e799f8..59f07c574 100644 --- a/searx/engines/youtube.py +++ b/searx/engines/youtube.py @@ -6,7 +6,7 @@ # @using-api yes # @results JSON # @stable yes -# @parse url, title, content, publishedDate, thumbnail +# @parse url, title, content, publishedDate, thumbnail, embedded from json import loads from urllib import urlencode @@ -19,7 +19,11 @@ language_support = True # search-url base_url = 'https://gdata.youtube.com/feeds/api/videos' -search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5' # noqa +search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5' + +embedded_url = '' # do search-request @@ -60,6 +64,8 @@ def response(resp): if url.endswith('&'): url = url[:-1] + videoid = url[32:] + title = result['title']['$t'] content = '' thumbnail = '' @@ -72,12 +78,15 @@ def response(resp): content = result['content']['$t'] + embedded = embedded_url.format(videoid=videoid) + # append result results.append({'url': url, 'title': title, 'content': content, 'template': 'videos.html', 'publishedDate': publishedDate, + 'embedded': embedded, 'thumbnail': thumbnail}) # return results -- cgit v1.2.3 From 0ca04be55dec06c5ef737febb128d3dc36c3b5d7 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 5 Jan 2015 20:24:33 +0100 Subject: Remove print --- searx/engines/deezer.py | 1 - 1 file changed, 1 deletion(-) (limited to 'searx/engines') diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 6c26b6aeb..433ceffa1 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -43,7 +43,6 @@ def response(resp): # parse results for result in search_res.get('data', []): if result['type'] == 'track': - print result title = result['title'] url = result['link'] content = result['artist']['name'] +\ -- cgit v1.2.3 From 299a80a1eb2eecb80f5c50da261a9eab1900b572 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Fri, 9 Jan 2015 04:13:05 +0100 Subject: [enh] using the logger --- searx/engines/__init__.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 9bc5cdfd4..643b107a5 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -22,6 +22,10 @@ from imp import load_source from flask.ext.babel import gettext from operator import itemgetter from searx import settings +from searx import logger + + +logger = logger.getChild('engines') engine_dir = dirname(realpath(__file__)) @@ -81,7 +85,7 @@ def load_engine(engine_data): if engine_attr.startswith('_'): continue if getattr(engine, engine_attr) is None: - print('[E] Engine config error: Missing attribute "{0}.{1}"' + logger.error('Missing engine config attribute: "{0}.{1}"' .format(engine.name, engine_attr)) sys.exit(1) @@ -100,9 +104,8 @@ def load_engine(engine_data): categories['general'].append(engine) if engine.shortcut: - # TODO check duplications if engine.shortcut in engine_shortcuts: - print('[E] Engine config error: ambigious shortcut: {0}' + logger.error('Engine config error: ambigious shortcut: {0}' .format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name @@ -199,7 +202,7 @@ def get_engines_stats(): if 'engines' not in settings or not settings['engines']: - print '[E] Error no engines found. Edit your settings.yml' + logger.error('No engines found. Edit your settings.yml') exit(2) for engine_data in settings['engines']: -- cgit v1.2.3 From c8be128e97479ea6c871c4b6fbf014fa8136e708 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Fri, 9 Jan 2015 11:21:46 +0100 Subject: [mod] ignore startpage unicode errors --- searx/engines/startpage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'searx/engines') diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 16da728cd..70b193952 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -66,7 +66,10 @@ def response(resp): continue link = links[0] url = link.attrib.get('href') - title = escape(link.text_content()) + try: + title = escape(link.text_content()) + except UnicodeDecodeError: + continue # block google-ad url's if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): -- cgit v1.2.3