summaryrefslogtreecommitdiff
path: root/searx/engines/google_news.py
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2014-09-01 18:30:55 +0200
committerAdam Tauber <asciimoo@gmail.com>2014-09-01 18:30:55 +0200
commitf36d1e28fae212b8b8640324d2e787b73305e2d2 (patch)
tree3ea188d416f972d1f7f1f4db7e218d5b491f93f8 /searx/engines/google_news.py
parent55dfb305a0057b8e94706ae152bb61d07772f334 (diff)
parent58a443be29e9fda5273af5118d72ff512ecb9e08 (diff)
Merge pull request #88 from pointhi/engines
update and fix search engines
Diffstat (limited to 'searx/engines/google_news.py')
-rw-r--r--searx/engines/google_news.py31
1 files changed, 25 insertions, 6 deletions
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 72b7a0661..becc7e21d 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -1,43 +1,62 @@
-#!/usr/bin/env python
+## Google (News)
+#
+# @website https://www.google.com
+# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
+#
+# @using-api yes
+# @results JSON
+# @stable yes (but deprecated)
+# @parse url, title, content, publishedDate
from urllib import urlencode
from json import loads
from dateutil import parser
+# search-url
categories = ['news']
+paging = True
+language_support = True
+# engine dependent config
url = 'https://ajax.googleapis.com/'
search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
-paging = True
-language_support = True
-
+# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 8
+
language = 'en-US'
if params['language'] != 'all':
language = params['language'].replace('_', '-')
+
params['url'] = search_url.format(offset=offset,
query=urlencode({'q': query}),
language=language)
+
return params
+# get response from search-request
def response(resp):
results = []
+
search_res = loads(resp.text)
+ # return empty array if there are no results
if not search_res.get('responseData', {}).get('results'):
return []
+ # parse results
for result in search_res['responseData']['results']:
-
-# Mon, 10 Mar 2014 16:26:15 -0700
+ # parse publishedDate
publishedDate = parser.parse(result['publishedDate'])
+ # append result
results.append({'url': result['unescapedUrl'],
'title': result['titleNoFormatting'],
'publishedDate': publishedDate,
'content': result['content']})
+
+ # return results
return results