diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2014-09-01 18:30:55 +0200 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2014-09-01 18:30:55 +0200 |
| commit | f36d1e28fae212b8b8640324d2e787b73305e2d2 (patch) | |
| tree | 3ea188d416f972d1f7f1f4db7e218d5b491f93f8 /searx/engines/google_news.py | |
| parent | 55dfb305a0057b8e94706ae152bb61d07772f334 (diff) | |
| parent | 58a443be29e9fda5273af5118d72ff512ecb9e08 (diff) | |
Merge pull request #88 from pointhi/engines
update and fix search engines
Diffstat (limited to 'searx/engines/google_news.py')
| -rw-r--r-- | searx/engines/google_news.py | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 72b7a0661..becc7e21d 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -1,43 +1,62 @@ -#!/usr/bin/env python +## Google (News) +# +# @website https://www.google.com +# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! +# +# @using-api yes +# @results JSON +# @stable yes (but deprecated) +# @parse url, title, content, publishedDate from urllib import urlencode from json import loads from dateutil import parser +# search-url categories = ['news'] +paging = True +language_support = True +# engine dependent config url = 'https://ajax.googleapis.com/' search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa -paging = True -language_support = True - +# do search-request def request(query, params): offset = (params['pageno'] - 1) * 8 + language = 'en-US' if params['language'] != 'all': language = params['language'].replace('_', '-') + params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), language=language) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) + # return empty array if there are no results if not search_res.get('responseData', {}).get('results'): return [] + # parse results for result in search_res['responseData']['results']: - -# Mon, 10 Mar 2014 16:26:15 -0700 + # parse publishedDate publishedDate = parser.parse(result['publishedDate']) + # append result results.append({'url': result['unescapedUrl'], 'title': result['titleNoFormatting'], 'publishedDate': publishedDate, 'content': result['content']}) + + # return results return results |