diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2015-01-01 14:10:59 +0100 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2015-01-01 14:10:59 +0100 |
| commit | 469e08881ee17d8a180d0c0741c1552a29108f0e (patch) | |
| tree | 59db47065d54fdde5576babc19155f128359386f /searx/engines/twitter.py | |
| parent | c7cbd38fcc60601dd3b41df8a3a234c079f5dc0b (diff) | |
| parent | 5d977056f7aa216eae09a22c3baaff73546f6ff1 (diff) | |
Merge pull request #165 from Cqoicebordel/Moar-engines
Moar engines
Diffstat (limited to 'searx/engines/twitter.py')
| -rw-r--r-- | searx/engines/twitter.py | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 0689150c8..bd9a8c2fc 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -1,6 +1,6 @@ ## Twitter (Social media) # -# @website https://www.bing.com/news +# @website https://twitter.com/ # @provide-api yes (https://dev.twitter.com/docs/using-search) # # @using-api no @@ -14,6 +14,7 @@ from urlparse import urljoin from urllib import urlencode from lxml import html from cgi import escape +from datetime import datetime # engine dependent config categories = ['social media'] @@ -27,7 +28,8 @@ search_url = base_url+'search?' results_xpath = '//li[@data-item-type="tweet"]' link_xpath = './/small[@class="time"]//a' title_xpath = './/span[@class="username js-action-profile-name"]//text()' -content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' +content_xpath = './/p[@class="js-tweet-text tweet-text"]' +timestamp_xpath = './/span[contains(@class,"_timestamp")]' # do search-request @@ -52,12 +54,21 @@ def response(resp): link = tweet.xpath(link_xpath)[0] url = urljoin(base_url, link.attrib.get('href')) title = ''.join(tweet.xpath(title_xpath)) - content = escape(''.join(tweet.xpath(content_xpath))) - - # append result - results.append({'url': url, - 'title': title, - 'content': content}) + content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8")) + pubdate = tweet.xpath(timestamp_xpath) + if len(pubdate) > 0: + timestamp = float(pubdate[0].attrib.get('data-time')) + publishedDate = datetime.fromtimestamp(timestamp, None) + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'publishedDate': publishedDate}) + else: + # append result + results.append({'url': url, + 'title': title, + 'content': content}) # return results return results |