diff options
Diffstat (limited to 'searx/engines/digg.py')
| -rw-r--r-- | searx/engines/digg.py | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/searx/engines/digg.py b/searx/engines/digg.py new file mode 100644 index 000000000..073410eb0 --- /dev/null +++ b/searx/engines/digg.py @@ -0,0 +1,69 @@ +""" + Digg (News, Social media) + + @website https://digg.com/ + @provide-api no + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, publishedDate, thumbnail +""" + +import random +import string +from dateutil import parser +from json import loads +from lxml import html +from searx.url_utils import urlencode +from datetime import datetime + +# engine dependent config +categories = ['news', 'social media'] +paging = True + +# search-url +base_url = 'https://digg.com/' +search_url = base_url + 'api/search/?{query}&from={position}&size=20&format=html' + +# specific xpath variables +results_xpath = '//article' +link_xpath = './/small[@class="time"]//a' +title_xpath = './/h2//a//text()' +content_xpath = './/p//text()' +pubdate_xpath = './/time' + +digg_cookie_chars = string.ascii_uppercase + string.ascii_lowercase +\ + string.digits + "+_" + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 20 + params['url'] = search_url.format(position=offset, + query=urlencode({'q': query})) + params['cookies']['frontend.auid'] = ''.join(random.choice( + digg_cookie_chars) for _ in range(22)) + return params + + +# get response from search-request +def response(resp): + results = [] + + search_result = loads(resp.text) + + # parse results + for result in search_result['mapped']: + + published = datetime.strptime(result['created']['ISO'], "%Y-%m-%d %H:%M:%S") + # append result + results.append({'url': result['url'], + 'title': result['title'], + 'content': result['excerpt'], + 'template': 'videos.html', + 'publishedDate': published, + 'thumbnail': result['images']['thumbImage']}) + + # return results + return results |