From e7e298153678fc0e77e24a3ae3b333b1230136b2 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 28 Dec 2014 22:57:59 +0100
Subject: Digg + Twitter corrections Digg engines, with thumbnails Add pubdate
 for twitter

---
 searx/engines/twitter.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'searx/engines/twitter.py')

diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 0689150c8..5a7046c83 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -1,6 +1,6 @@
 ## Twitter (Social media)
 #
-# @website     https://www.bing.com/news
+# @website     https://twitter.com/
 # @provide-api yes (https://dev.twitter.com/docs/using-search)
 #
 # @using-api   no
@@ -14,6 +14,7 @@ from urlparse import urljoin
 from urllib import urlencode
 from lxml import html
 from cgi import escape
+from datetime import datetime
 
 # engine dependent config
 categories = ['social media']
@@ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]'
 link_xpath = './/small[@class="time"]//a'
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
+timestamp_xpath = './/span[contains(@class,"_timestamp")]'
 
 
 # do search-request
@@ -53,11 +55,19 @@ def response(resp):
         url = urljoin(base_url, link.attrib.get('href'))
         title = ''.join(tweet.xpath(title_xpath))
         content = escape(''.join(tweet.xpath(content_xpath)))
-
-        # append result
-        results.append({'url': url,
-                        'title': title,
-                        'content': content})
+        pubdate = tweet.xpath(timestamp_xpath)
+        if len(pubdate) > 0:
+            publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
+            # append result
+            results.append({'url': url,
+                            'title': title,
+                            'content': content,
+                            'publishedDate': publishedDate})
+        else:
+            # append result
+            results.append({'url': url,
+                            'title': title,
+                            'content': content})
 
     # return results
     return results
-- 
cgit v1.2.3


From 5d977056f7aa216eae09a22c3baaff73546f6ff1 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 29 Dec 2014 21:31:04 +0100
Subject: Flake8 and Twitter corrections Lots of Flake8 corrections Maybe we
 should change the rule to allow lines of 120 chars. It seems more usable.

Big twitter correction : now it outputs the words in right order...
---
 searx/engines/twitter.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'searx/engines/twitter.py')

diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 5a7046c83..bd9a8c2fc 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -28,7 +28,7 @@ search_url = base_url+'search?'
 results_xpath = '//li[@data-item-type="tweet"]'
 link_xpath = './/small[@class="time"]//a'
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
-content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
+content_xpath = './/p[@class="js-tweet-text tweet-text"]'
 timestamp_xpath = './/span[contains(@class,"_timestamp")]'
 
 
@@ -54,10 +54,11 @@ def response(resp):
         link = tweet.xpath(link_xpath)[0]
         url = urljoin(base_url, link.attrib.get('href'))
         title = ''.join(tweet.xpath(title_xpath))
-        content = escape(''.join(tweet.xpath(content_xpath)))
+        content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
         pubdate = tweet.xpath(timestamp_xpath)
         if len(pubdate) > 0:
-            publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
+            timestamp = float(pubdate[0].attrib.get('data-time'))
+            publishedDate = datetime.fromtimestamp(timestamp, None)
             # append result
             results.append({'url': url,
                             'title': title,
-- 
cgit v1.2.3