From f965c978222cf48e8dd4b7dd6c9a28ccca9bc62f Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 31 May 2015 00:25:59 +0200 Subject: Adds two engines : Youtube with or without API The API needs an API_KEY The NOAPI doesn't have the published dates. --- searx/engines/youtube_api.py | 83 ++++++++++++++++++++++++++++++++++++++++++ searx/engines/youtube_noapi.py | 72 ++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 searx/engines/youtube_api.py create mode 100644 searx/engines/youtube_noapi.py (limited to 'searx/engines') diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py new file mode 100644 index 000000000..8fd939a25 --- /dev/null +++ b/searx/engines/youtube_api.py @@ -0,0 +1,83 @@ +# Youtube (Videos) +# +# @website https://www.youtube.com/ +# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, publishedDate, thumbnail, embedded + +from json import loads +from urllib import urlencode +from dateutil import parser + +# engine dependent config +categories = ['videos', 'music'] +paging = False +language_support = True +api_key = None + +# search-url +base_url = 'https://www.googleapis.com/youtube/v3/search' +search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}' + +embedded_url = '' + +base_youtube_url = 'https://www.youtube.com/watch?v=' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query}), + api_key=api_key) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'items' not in search_results: + return [] + + # parse results + for result in search_results['items']: + videoid = result['id']['videoId'] + + title = result['snippet']['title'] + content = '' + thumbnail = '' + + pubdate = result['snippet']['publishedAt'] + publishedDate = parser.parse(pubdate) + + thumbnail = result['snippet']['thumbnails']['high']['url'] + + content = result['snippet']['description'] + + url = base_youtube_url + videoid + + embedded = embedded_url.format(videoid=videoid) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail}) + + # return results + return results diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py new file mode 100644 index 000000000..f78e43f0f --- /dev/null +++ b/searx/engines/youtube_noapi.py @@ -0,0 +1,72 @@ +# Youtube (Videos) +# +# @website https://www.youtube.com/ +# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) +# +# @using-api no +# @results HTML +# @stable no +# @parse url, title, content, publishedDate, thumbnail, embedded + +from urllib import quote_plus +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['videos', 'music'] +paging = True +language_support = False + +# search-url +base_url = 'https://www.youtube.com/results' +search_url = base_url + '?search_query={query}&page={page}' + +embedded_url = '' + +base_youtube_url = 'https://www.youtube.com/watch?v=' + +# specific xpath variables +results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]" +url_xpath = './/h3/a/@href' +title_xpath = './/div[@class="yt-lockup-content"]/h3/a' +content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=quote_plus(query), + page=params['pageno']) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath(results_xpath): + videoid = result.xpath('@data-context-item-id')[0] + + url = base_youtube_url + videoid + thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg' + + title = extract_text(result.xpath(title_xpath)[0]) + content = extract_text(result.xpath(content_xpath)[0]) + + embedded = embedded_url.format(videoid=videoid) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'embedded': embedded, + 'thumbnail': thumbnail}) + + # return results + return results -- cgit v1.2.3 From 884eeb8541e0a4cf3d65c2a17e1c2f788cab7fb1 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 1 Jun 2015 00:00:32 +0200 Subject: New Qwant engines - Web - Images - News - Social media --- searx/engines/qwant.py | 66 ++++++++++++++++++++++++++++++++++++++++ searx/engines/qwant_images.py | 70 +++++++++++++++++++++++++++++++++++++++++++ searx/engines/qwant_news.py | 69 ++++++++++++++++++++++++++++++++++++++++++ searx/engines/qwant_social.py | 69 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 274 insertions(+) create mode 100644 searx/engines/qwant.py create mode 100644 searx/engines/qwant_images.py create mode 100644 searx/engines/qwant_news.py create mode 100644 searx/engines/qwant_social.py (limited to 'searx/engines') diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py new file mode 100644 index 000000000..91c12a19e --- /dev/null +++ b/searx/engines/qwant.py @@ -0,0 +1,66 @@ +""" + Qwant (Web) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads + +# engine dependent config +categories = ['general'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/web?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + + # append result + results.append({'title': title, + 'content': content, + 'url': res_url}) + + # return results + return results diff --git a/searx/engines/qwant_images.py b/searx/engines/qwant_images.py new file mode 100644 index 000000000..1c1753389 --- /dev/null +++ b/searx/engines/qwant_images.py @@ -0,0 +1,70 @@ +""" + Qwant (Images) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads + +# engine dependent config +categories = ['images'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/images?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + thumbnail_src = result['thumbnail'] + img_src = result['media'] + + # append result + results.append({'template': 'images.html', + 'url': res_url, + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'img_src': img_src}) + + # return results + return results diff --git a/searx/engines/qwant_news.py b/searx/engines/qwant_news.py new file mode 100644 index 000000000..c4d5be5d3 --- /dev/null +++ b/searx/engines/qwant_news.py @@ -0,0 +1,69 @@ +""" + Qwant (News) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads +from datetime import datetime + +# engine dependent config +categories = ['news'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/news?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + published_date = datetime.fromtimestamp(result['date'], None) + + # append result + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content}) + + # return results + return results diff --git a/searx/engines/qwant_social.py b/searx/engines/qwant_social.py new file mode 100644 index 000000000..474dfac02 --- /dev/null +++ b/searx/engines/qwant_social.py @@ -0,0 +1,69 @@ +""" + Qwant (social media) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads +from datetime import datetime + +# engine dependent config +categories = ['social media'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/social?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + published_date = datetime.fromtimestamp(result['date'], None) + + # append result + results.append({'url': res_url, + 'title': title, + 'content': content, + 'publishedDate': published_date}) + + # return results + return results -- cgit v1.2.3 From f05087b93ac1ebef3bdacd353524bac0d8041832 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 2 Jun 2015 20:36:58 +0200 Subject: Refactor Use only one engine for the four search from Qwant --- searx/engines/qwant.py | 38 ++++++++++++++++++----- searx/engines/qwant_images.py | 70 ------------------------------------------- searx/engines/qwant_news.py | 69 ------------------------------------------ searx/engines/qwant_social.py | 69 ------------------------------------------ 4 files changed, 30 insertions(+), 216 deletions(-) delete mode 100644 searx/engines/qwant_images.py delete mode 100644 searx/engines/qwant_news.py delete mode 100644 searx/engines/qwant_social.py (limited to 'searx/engines') diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 91c12a19e..38bafb043 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -1,5 +1,5 @@ """ - Qwant (Web) + Qwant (Web, Images, News, Social) @website https://qwant.com/ @provide-api not officially (https://api.qwant.com/api/search/) @@ -12,21 +12,25 @@ from urllib import urlencode from json import loads +from datetime import datetime # engine dependent config -categories = ['general'] +categories = None paging = True language_support = True +search_url_keyword = None + # search-url -url = 'https://api.qwant.com/api/search/web?count=10&offset={offset}&f=&{query}' +url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}' # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 - params['url'] = url.format(query=urlencode({'q': query}), + params['url'] = url.format(keyword=search_url_keyword, + query=urlencode({'q': query}), offset=offset) # add language tag if specified @@ -57,10 +61,28 @@ def response(resp): res_url = result['url'] content = result['desc'] - # append result - results.append({'title': title, - 'content': content, - 'url': res_url}) + if search_url_keyword == 'web': + results.append({'title': title, + 'content': content, + 'url': res_url}) + + elif search_url_keyword == 'images': + thumbnail_src = result['thumbnail'] + img_src = result['media'] + results.append({'template': 'images.html', + 'url': res_url, + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'img_src': img_src}) + + elif search_url_keyword == 'news' or search_url_keyword == 'social': + published_date = datetime.fromtimestamp(result['date'], None) + + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content}) # return results return results diff --git a/searx/engines/qwant_images.py b/searx/engines/qwant_images.py deleted file mode 100644 index 1c1753389..000000000 --- a/searx/engines/qwant_images.py +++ /dev/null @@ -1,70 +0,0 @@ -""" - Qwant (Images) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads - -# engine dependent config -categories = ['images'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/images?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - thumbnail_src = result['thumbnail'] - img_src = result['media'] - - # append result - results.append({'template': 'images.html', - 'url': res_url, - 'title': title, - 'content': '', - 'thumbnail_src': thumbnail_src, - 'img_src': img_src}) - - # return results - return results diff --git a/searx/engines/qwant_news.py b/searx/engines/qwant_news.py deleted file mode 100644 index c4d5be5d3..000000000 --- a/searx/engines/qwant_news.py +++ /dev/null @@ -1,69 +0,0 @@ -""" - Qwant (News) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads -from datetime import datetime - -# engine dependent config -categories = ['news'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/news?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - content = result['desc'] - published_date = datetime.fromtimestamp(result['date'], None) - - # append result - results.append({'url': res_url, - 'title': title, - 'publishedDate': published_date, - 'content': content}) - - # return results - return results diff --git a/searx/engines/qwant_social.py b/searx/engines/qwant_social.py deleted file mode 100644 index 474dfac02..000000000 --- a/searx/engines/qwant_social.py +++ /dev/null @@ -1,69 +0,0 @@ -""" - Qwant (social media) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads -from datetime import datetime - -# engine dependent config -categories = ['social media'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/social?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - content = result['desc'] - published_date = datetime.fromtimestamp(result['date'], None) - - # append result - results.append({'url': res_url, - 'title': title, - 'content': content, - 'publishedDate': published_date}) - - # return results - return results -- cgit v1.2.3 From e0774c849c48373c7a49515d5d769c5868596494 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 2 Jun 2015 22:11:47 +0200 Subject: Removed the keywords from the settings in qwant engine --- searx/engines/qwant.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 38bafb043..872bd4e95 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -19,7 +19,10 @@ categories = None paging = True language_support = True -search_url_keyword = None +category_to_keyword = {'general': 'web', + 'images': 'images', + 'news': 'news', + 'social media': 'social'} # search-url url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}' @@ -29,9 +32,15 @@ url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{q def request(query, params): offset = (params['pageno'] - 1) * 10 - params['url'] = url.format(keyword=search_url_keyword, - query=urlencode({'q': query}), - offset=offset) + if categories[0] and categories[0] in category_to_keyword: + + params['url'] = url.format(keyword=category_to_keyword[categories[0]], + query=urlencode({'q': query}), + offset=offset) + else: + params['url'] = url.format(keyword='web', + query=urlencode({'q': query}), + offset=offset) # add language tag if specified if params['language'] != 'all': @@ -61,12 +70,12 @@ def response(resp): res_url = result['url'] content = result['desc'] - if search_url_keyword == 'web': + if category_to_keyword.get(categories[0], '') == 'web': results.append({'title': title, 'content': content, 'url': res_url}) - elif search_url_keyword == 'images': + elif category_to_keyword.get(categories[0], '') == 'images': thumbnail_src = result['thumbnail'] img_src = result['media'] results.append({'template': 'images.html', @@ -76,7 +85,8 @@ def response(resp): 'thumbnail_src': thumbnail_src, 'img_src': img_src}) - elif search_url_keyword == 'news' or search_url_keyword == 'social': + elif (category_to_keyword.get(categories[0], '') == 'news' or + category_to_keyword.get(categories[0], '') == 'social'): published_date = datetime.fromtimestamp(result['date'], None) results.append({'url': res_url, -- cgit v1.2.3