From 7937218be66f1fb3eff02bce308a4e5c78ba6672 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 9 Dec 2014 02:36:53 +0100
Subject: Use human readable date

For DoB and DoD, wikipedia use a non standard ISO format, not easily readable.
Now the date is displayed in an human readable form, using the language setting as locale if available. If not, it uses the default locale.
---
 searx/engines/wikidata.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'searx/engines')

diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index ab799e6ce..bda80cdca 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -1,6 +1,9 @@
 import json
 from requests import get
 from urllib import urlencode
+import locale
+import time
+import dateutil.parser
 
 result_count = 1
 wikidata_host = 'https://www.wikidata.org'
@@ -35,6 +38,16 @@ def response(resp):
     language = resp.search_params['language'].split('_')[0]
     if language == 'all':
         language = 'en'
+    
+    try:
+        locale.setlocale(locale.LC_ALL, str(resp.search_params['language']))
+    except:
+        try:
+            locale.setlocale(locale.LC_ALL, 'en_US')
+        except:
+            pass
+        pass
+    
     url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids),
                                             'languages': language + '|en'}))
 
@@ -164,10 +177,12 @@ def getDetail(jsonresponse, wikidata_id, language):
 
     date_of_birth = get_time(claims, 'P569', None)
     if date_of_birth is not None:
+        date_of_birth = dateutil.parser.parse(date_of_birth[8:]).strftime(locale.nl_langinfo(locale.D_FMT))
         attributes.append({'label': 'Date of birth', 'value': date_of_birth})
 
     date_of_death = get_time(claims, 'P570', None)
     if date_of_death is not None:
+        date_of_death = dateutil.parser.parse(date_of_death[8:]).strftime(locale.nl_langinfo(locale.D_FMT))
         attributes.append({'label': 'Date of death', 'value': date_of_death})
 
     if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
-- 
cgit v1.2.3


From 0059d08f13b1bf64b3f36ab2cbe89d5fec5d727c Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 15 Dec 2014 03:21:25 +0100
Subject: Rework Flickr Engine Everything was redone to use the API. It needs
 an API key, but it's worth it. Everything works. Title, Image, Content, URL
 The API allow lots of things. Thumbnails and date will be easy to add when it
 will be implemented in Searx.

Fix asciimoo/searx#126
---
 searx/engines/flickr.py | 81 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 26 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index 4ec2841dd..8b60aed1d 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -1,54 +1,83 @@
 #!/usr/bin/env python
 
+## Flickr (Images)
+# 
+# @website     https://www.flickr.com
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) 
+# 
+# @using-api   yes
+# @results     JSON
+# @stable      yes
+# @parse       url, title, thumbnail, img_src
+#More info on api-key : https://www.flickr.com/services/apps/create/
+
 from urllib import urlencode
-#from json import loads
+from json import loads
 from urlparse import urljoin
 from lxml import html
 from time import time
 
 categories = ['images']
 
-url = 'https://secure.flickr.com/'
-search_url = url+'search/?{query}&page={page}'
-results_xpath = '//div[@class="view display-item-tile"]/figure/div'
+nb_per_page = 15
+paging = True
+api_key= None
+
+
+url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
+photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 
 paging = True
 
+def build_flickr_url(user_id, photo_id):
+    return photo_url.format(userid=user_id,photoid=photo_id)
+
 
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'text': query}),
-                                      page=params['pageno'])
-    time_string = str(int(time())-3)
-    params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
-    params['cookies']['xb'] = '421409'
-    params['cookies']['localization'] = 'en-us'
-    params['cookies']['flrbp'] = time_string +\
-        '-3a8cdb85a427a33efda421fbda347b2eaf765a54'
-    params['cookies']['flrbs'] = time_string +\
-        '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
-    params['cookies']['flrb'] = '9'
+    params['url'] = url.format(text=urlencode({'text': query}),
+                               api_key=api_key,
+                               nb_per_page=nb_per_page,
+                               page=params['pageno'])
     return params
 
 
 def response(resp):
     results = []
-    dom = html.fromstring(resp.text)
-    for result in dom.xpath(results_xpath):
-        img = result.xpath('.//img')
+    
+    search_results = loads(resp.text)
 
-        if not img:
-            continue
+    # return empty array if there are no results
+    if not 'photos' in search_results:
+        return []
+
+    if not 'photo' in search_results['photos']:
+        return []
 
-        img = img[0]
-        img_src = 'https:'+img.attrib.get('src')
+    photos = search_results['photos']['photo']
 
-        if not img_src:
+    # parse results
+    for photo in photos:
+        if 'url_o' in photo:
+            img_src = photo['url_o']
+        elif 'url_z' in photo:
+            img_src = photo['url_z']
+        else:
             continue
 
-        href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
-        title = img.attrib.get('alt', '')
-        results.append({'url': href,
+        url = build_flickr_url(photo['owner'], photo['id'])
+
+        title = photo['title']
+        
+        content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />'
+        
+        content = content + ' <span class="description">' + photo['description']['_content'] + '</span>'
+        
+        # append result
+        results.append({'url': url,
                         'title': title,
                         'img_src': img_src,
+                        'content': content,
                         'template': 'images.html'})
+
+    # return results
     return results
-- 
cgit v1.2.3


From 930f724ec639c167d870d716240ac5d4512beba2 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 16 Dec 2014 20:40:03 +0100
Subject: Add an No Api Flickr Engine It uses the webpage json infos to build
 the results Let the user choose the engine in setting.yml. Noapi active by
 default + little corrections on Flickr engine

---
 searx/engines/flickr-noapi.py | 102 ++++++++++++++++++++++++++++++++++++++++++
 searx/engines/flickr.py       |   5 +--
 2 files changed, 103 insertions(+), 4 deletions(-)
 create mode 100644 searx/engines/flickr-noapi.py

(limited to 'searx/engines')

diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py
new file mode 100644
index 000000000..b44affec6
--- /dev/null
+++ b/searx/engines/flickr-noapi.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+## Flickr (Images)
+# 
+# @website     https://www.flickr.com
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) 
+# 
+# @using-api   no
+# @results     HTML
+# @stable      no
+# @parse       url, title, thumbnail, img_src
+
+from urllib import urlencode
+from json import loads
+from urlparse import urljoin
+from lxml import html
+import re
+
+categories = ['images']
+
+url = 'https://secure.flickr.com/'
+search_url = url+'search/?{query}&page={page}'
+photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
+regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
+
+paging = True
+
+def build_flickr_url(user_id, photo_id):
+    return photo_url.format(userid=user_id,photoid=photo_id)
+
+
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'text': query}),
+                                      page=params['pageno'])
+    return params
+
+
+def response(resp):
+    results = []
+    
+    matches = regex.search(resp.text)
+    
+    if matches == None:
+        return results
+
+    match = matches.group(1)
+    search_results = loads(match)
+    
+    if not '_data' in search_results:
+        return []
+    
+    photos = search_results['_data']
+    
+    for photo in photos:
+        
+        # In paged configuration, the first pages' photos are represented by a None object
+        if photo == None:
+            continue
+        
+        # From the biggest to the lowest format
+        if 'o' in photo['sizes']:
+            img_src = photo['sizes']['o']['displayUrl']
+        elif 'k' in photo['sizes']:
+            img_src = photo['sizes']['k']['displayUrl']
+        elif 'h' in photo['sizes']:
+            img_src = photo['sizes']['h']['displayUrl']
+        elif 'b' in photo['sizes']:
+            img_src = photo['sizes']['b']['displayUrl']
+        elif 'c' in photo['sizes']:
+            img_src = photo['sizes']['c']['displayUrl']
+        elif 'z' in photo['sizes']:
+            img_src = photo['sizes']['z']['displayUrl']
+        elif 'n' in photo['sizes']:
+            img_src = photo['sizes']['n']['displayUrl']
+        elif 'm' in photo['sizes']:
+            img_src = photo['sizes']['m']['displayUrl']
+        elif 't' in photo['sizes']:
+            img_src = photo['sizes']['to']['displayUrl']
+        elif 'q' in photo['sizes']:
+            img_src = photo['sizes']['q']['displayUrl']
+        elif 's' in photo['sizes']:
+            img_src = photo['sizes']['s']['displayUrl']
+        else:
+            continue
+        
+        url = build_flickr_url(photo['owner']['id'], photo['id'])
+
+        title = photo['title']
+        
+        content = '<span class="photo-author">'+ photo['owner']['username'] +'</span><br />'
+        
+        if 'description' in photo:
+            content = content + '<span class="description">' + photo['description'] + '</span>'
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'img_src': img_src,
+                        'content': content,
+                        'template': 'images.html'})
+        
+    return results
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index 8b60aed1d..2fa5ed7ec 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -13,9 +13,6 @@
 
 from urllib import urlencode
 from json import loads
-from urlparse import urljoin
-from lxml import html
-from time import time
 
 categories = ['images']
 
@@ -70,7 +67,7 @@ def response(resp):
         
         content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />'
         
-        content = content + ' <span class="description">' + photo['description']['_content'] + '</span>'
+        content = content + '<span class="description">' + photo['description']['_content'] + '</span>'
         
         # append result
         results.append({'url': url,
-- 
cgit v1.2.3


From 550232fc21ff2c3ae9a5de3d8b999de66c96171c Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 22 Dec 2014 01:00:16 +0100
Subject: SubtitleSeeker Engine Add the subtitleseeker engine.

---
 searx/engines/subtitleseeker.py | 59 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 searx/engines/subtitleseeker.py

(limited to 'searx/engines')

diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
new file mode 100644
index 000000000..346298300
--- /dev/null
+++ b/searx/engines/subtitleseeker.py
@@ -0,0 +1,59 @@
+## Subtitleseeker (Video)
+#
+# @website     http://www.subtitleseeker.com
+# @provide-api no
+#
+# @using-api   no
+# @results     HTML
+# @stable      no (HTML can change)
+# @parse       url, title, content
+
+from cgi import escape
+from urllib import quote_plus
+from lxml import html
+
+# engine dependent config
+categories = ['videos']
+paging = True
+
+# search-url
+url = 'http://www.subtitleseeker.com/'
+search_url = url+'search/TITLES/{query}&p={pageno}'
+
+# specific xpath variables
+results_xpath = '//div[@class="boxRows"]'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(query=quote_plus(query),
+                                      pageno=params['pageno'])
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+
+    # parse results
+    for result in dom.xpath(results_xpath):
+        link = result.xpath(".//a")[0]
+        href = link.attrib.get('href')
+        title = escape(link.xpath(".//text()")[0])
+
+        content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
+        content = content + " - "
+        content = content + html.tostring(result.xpath('.//div[contains(@class,"grey-web")]')[0], method='text')
+
+        if result.xpath(".//span") != []:
+            content = content + " - (" + result.xpath(".//span//text()")[0].strip() + ")"
+
+        # append result
+        results.append({'url': href,
+                        'title': title,
+                        'content': escape(content)})
+
+    # return results
+    return results
-- 
cgit v1.2.3


From b975418e4ce33aef530f7ad88e100d47d73e4761 Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Mon, 22 Dec 2014 14:15:59 +0100
Subject: [fix] flickr engine code cleanup ++ handle missing owner

---
 searx/engines/flickr-noapi.py | 77 ++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 45 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py
index b44affec6..522503b53 100644
--- a/searx/engines/flickr-noapi.py
+++ b/searx/engines/flickr-noapi.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
 
-## Flickr (Images)
-# 
+#  Flickr (Images)
+#
 # @website     https://www.flickr.com
-# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) 
-# 
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
+#
 # @using-api   no
 # @results     HTML
 # @stable      no
@@ -12,8 +12,6 @@
 
 from urllib import urlencode
 from json import loads
-from urlparse import urljoin
-from lxml import html
 import re
 
 categories = ['images']
@@ -22,11 +20,13 @@ url = 'https://secure.flickr.com/'
 search_url = url+'search/?{query}&page={page}'
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
+image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
 
 paging = True
 
+
 def build_flickr_url(user_id, photo_id):
-    return photo_url.format(userid=user_id,photoid=photo_id)
+    return photo_url.format(userid=user_id, photoid=photo_id)
 
 
 def request(query, params):
@@ -37,58 +37,45 @@ def request(query, params):
 
 def response(resp):
     results = []
-    
+
     matches = regex.search(resp.text)
-    
-    if matches == None:
+
+    if matches is None:
         return results
 
     match = matches.group(1)
     search_results = loads(match)
-    
-    if not '_data' in search_results:
+
+    if '_data' not in search_results:
         return []
-    
+
     photos = search_results['_data']
-    
+
     for photo in photos:
-        
+
         # In paged configuration, the first pages' photos are represented by a None object
-        if photo == None:
+        if photo is None:
             continue
-        
+
+        img_src = None
         # From the biggest to the lowest format
-        if 'o' in photo['sizes']:
-            img_src = photo['sizes']['o']['displayUrl']
-        elif 'k' in photo['sizes']:
-            img_src = photo['sizes']['k']['displayUrl']
-        elif 'h' in photo['sizes']:
-            img_src = photo['sizes']['h']['displayUrl']
-        elif 'b' in photo['sizes']:
-            img_src = photo['sizes']['b']['displayUrl']
-        elif 'c' in photo['sizes']:
-            img_src = photo['sizes']['c']['displayUrl']
-        elif 'z' in photo['sizes']:
-            img_src = photo['sizes']['z']['displayUrl']
-        elif 'n' in photo['sizes']:
-            img_src = photo['sizes']['n']['displayUrl']
-        elif 'm' in photo['sizes']:
-            img_src = photo['sizes']['m']['displayUrl']
-        elif 't' in photo['sizes']:
-            img_src = photo['sizes']['to']['displayUrl']
-        elif 'q' in photo['sizes']:
-            img_src = photo['sizes']['q']['displayUrl']
-        elif 's' in photo['sizes']:
-            img_src = photo['sizes']['s']['displayUrl']
-        else:
+        for image_size in image_sizes:
+            if image_size in photo['sizes']:
+                img_src = photo['sizes'][image_size]['displayUrl']
+                break
+
+        if not img_src:
+            continue
+
+        if 'id' not in photo['owner']:
             continue
-        
+
         url = build_flickr_url(photo['owner']['id'], photo['id'])
 
         title = photo['title']
-        
-        content = '<span class="photo-author">'+ photo['owner']['username'] +'</span><br />'
-        
+
+        content = '<span class="photo-author">' + photo['owner']['username'] + '</span><br />'
+
         if 'description' in photo:
             content = content + '<span class="description">' + photo['description'] + '</span>'
 
@@ -98,5 +85,5 @@ def response(resp):
                         'img_src': img_src,
                         'content': content,
                         'template': 'images.html'})
-        
+
     return results
-- 
cgit v1.2.3


From 829948b85df0510e331372bcd60cb31db9c96a5c Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 23 Dec 2014 01:41:25 +0100
Subject: Add language support Allow the user to select a language. It must be
 written in english, and capitalized, ie : English, French, German,
 Hungarian...

---
 searx/engines/subtitleseeker.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'searx/engines')

diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 346298300..c72f81899 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -16,6 +16,8 @@ from lxml import html
 categories = ['videos']
 paging = True
 
+language = ""
+
 # search-url
 url = 'http://www.subtitleseeker.com/'
 search_url = url+'search/TITLES/{query}&p={pageno}'
@@ -41,6 +43,10 @@ def response(resp):
     for result in dom.xpath(results_xpath):
         link = result.xpath(".//a")[0]
         href = link.attrib.get('href')
+        
+        if language is not "":
+            href = href + language + "/"
+
         title = escape(link.xpath(".//text()")[0])
 
         content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
-- 
cgit v1.2.3


From 2ea55b1c6451e77381bd88dd82f635d48ff1b6fe Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 23 Dec 2014 01:45:39 +0100
Subject: Add language support Allow the user to select a language. It must be
 written in english, and capitalized, ie : English, French, German,
 Hungarian... (reverted from commit 829948b85df0510e331372bcd60cb31db9c96a5c)

---
 searx/engines/subtitleseeker.py | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index c72f81899..346298300 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -16,8 +16,6 @@ from lxml import html
 categories = ['videos']
 paging = True
 
-language = ""
-
 # search-url
 url = 'http://www.subtitleseeker.com/'
 search_url = url+'search/TITLES/{query}&p={pageno}'
@@ -43,10 +41,6 @@ def response(resp):
     for result in dom.xpath(results_xpath):
         link = result.xpath(".//a")[0]
         href = link.attrib.get('href')
-        
-        if language is not "":
-            href = href + language + "/"
-
         title = escape(link.xpath(".//text()")[0])
 
         content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
-- 
cgit v1.2.3


From 10e4f6f31631fe51d16b324223525570f3e75850 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 23 Dec 2014 01:51:07 +0100
Subject: Add language support Allow the user to select a language. It must be
 written in english, and capitalized, ie : English, French, German,
 Hungarian...

---
 searx/engines/subtitleseeker.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'searx/engines')

diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 346298300..48790a35c 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -15,6 +15,7 @@ from lxml import html
 # engine dependent config
 categories = ['videos']
 paging = True
+language = ""
 
 # search-url
 url = 'http://www.subtitleseeker.com/'
@@ -41,6 +42,10 @@ def response(resp):
     for result in dom.xpath(results_xpath):
         link = result.xpath(".//a")[0]
         href = link.attrib.get('href')
+
+        if language is not "":
+            href = href + language + "/"
+
         title = escape(link.xpath(".//text()")[0])
 
         content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
-- 
cgit v1.2.3


From 3b3921fc593e49c12ff79df1d6b15d01fe481bec Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Wed, 24 Dec 2014 21:02:26 +0100
Subject: [enh] subtitleseeker: better language handling

---
 searx/engines/subtitleseeker.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'searx/engines')

diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 48790a35c..2f1636f59 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -11,6 +11,7 @@
 from cgi import escape
 from urllib import quote_plus
 from lxml import html
+from searx.languages import language_codes
 
 # engine dependent config
 categories = ['videos']
@@ -38,13 +39,22 @@ def response(resp):
 
     dom = html.fromstring(resp.text)
 
+    search_lang = ""
+
+    if resp.search_params['language'] != 'all':
+        search_lang = [lc[1]
+                       for lc in language_codes
+                       if lc[0][:2] == resp.search_params['language']][0]
+
     # parse results
     for result in dom.xpath(results_xpath):
         link = result.xpath(".//a")[0]
         href = link.attrib.get('href')
 
         if language is not "":
-            href = href + language + "/"
+            href = href + language + '/'
+        elif search_lang:
+            href = href + search_lang + '/'
 
         title = escape(link.xpath(".//text()")[0])
 
-- 
cgit v1.2.3


From e7e298153678fc0e77e24a3ae3b333b1230136b2 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 28 Dec 2014 22:57:59 +0100
Subject: Digg + Twitter corrections Digg engines, with thumbnails Add pubdate
 for twitter

---
 searx/engines/digg.py    | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
 searx/engines/twitter.py | 22 +++++++++++-----
 2 files changed, 82 insertions(+), 6 deletions(-)
 create mode 100644 searx/engines/digg.py

(limited to 'searx/engines')

diff --git a/searx/engines/digg.py b/searx/engines/digg.py
new file mode 100644
index 000000000..4ebfe58c1
--- /dev/null
+++ b/searx/engines/digg.py
@@ -0,0 +1,66 @@
+## Digg (News, Social media)
+#
+# @website     https://digg.com/
+# @provide-api no
+#
+# @using-api   no
+# @results     HTML (using search portal)
+# @stable      no (HTML can change)
+# @parse       url, title, content, publishedDate, thumbnail
+
+from urllib import quote_plus
+from json import loads
+from lxml import html
+from cgi import escape
+from dateutil import parser
+
+# engine dependent config
+categories = ['news', 'social media']
+paging = True
+
+# search-url
+base_url = 'https://digg.com/'
+search_url = base_url+'api/search/{query}.json?position={position}&format=html'
+
+# specific xpath variables
+results_xpath = '//article'
+link_xpath = './/small[@class="time"]//a'
+title_xpath = './/h2//a//text()'
+content_xpath = './/p//text()'
+pubdate_xpath = './/time'
+
+
+# do search-request
+def request(query, params):
+    offset = (params['pageno'] - 1) * 10
+    params['url'] = search_url.format(position=offset,
+                                      query=quote_plus(query))
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    search_result = loads(resp.text)
+
+    dom = html.fromstring(search_result['html'])
+
+    # parse results
+    for result in dom.xpath(results_xpath):
+        url = result.attrib.get('data-contenturl')
+        thumbnail = result.xpath('.//img')[0].attrib.get('src')
+        title = ''.join(result.xpath(title_xpath))
+        content = escape(''.join(result.xpath(content_xpath)))
+        publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'content': content,
+                        'template': 'videos.html',
+                        'publishedDate': publishedDate,
+                        'thumbnail': thumbnail})
+
+    # return results
+    return results
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 0689150c8..5a7046c83 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -1,6 +1,6 @@
 ## Twitter (Social media)
 #
-# @website     https://www.bing.com/news
+# @website     https://twitter.com/
 # @provide-api yes (https://dev.twitter.com/docs/using-search)
 #
 # @using-api   no
@@ -14,6 +14,7 @@ from urlparse import urljoin
 from urllib import urlencode
 from lxml import html
 from cgi import escape
+from datetime import datetime
 
 # engine dependent config
 categories = ['social media']
@@ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]'
 link_xpath = './/small[@class="time"]//a'
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
+timestamp_xpath = './/span[contains(@class,"_timestamp")]'
 
 
 # do search-request
@@ -53,11 +55,19 @@ def response(resp):
         url = urljoin(base_url, link.attrib.get('href'))
         title = ''.join(tweet.xpath(title_xpath))
         content = escape(''.join(tweet.xpath(content_xpath)))
-
-        # append result
-        results.append({'url': url,
-                        'title': title,
-                        'content': content})
+        pubdate = tweet.xpath(timestamp_xpath)
+        if len(pubdate) > 0:
+            publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
+            # append result
+            results.append({'url': url,
+                            'title': title,
+                            'content': content,
+                            'publishedDate': publishedDate})
+        else:
+            # append result
+            results.append({'url': url,
+                            'title': title,
+                            'content': content})
 
     # return results
     return results
-- 
cgit v1.2.3


From 5d977056f7aa216eae09a22c3baaff73546f6ff1 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 29 Dec 2014 21:31:04 +0100
Subject: Flake8 and Twitter corrections Lots of Flake8 corrections Maybe we
 should change the rule to allow lines of 120 chars. It seems more usable.

Big twitter correction : now it outputs the words in right order...
---
 searx/engines/500px.py           |  4 ++--
 searx/engines/__init__.py        |  4 ++--
 searx/engines/digg.py            |  3 ++-
 searx/engines/flickr-noapi.py    | 12 +++++++++---
 searx/engines/flickr.py          | 31 +++++++++++++++++++------------
 searx/engines/kickass.py         |  5 +++--
 searx/engines/searchcode_code.py | 18 +++++++++---------
 searx/engines/searchcode_doc.py  | 15 +++++++++++----
 searx/engines/subtitleseeker.py  |  8 ++++++--
 searx/engines/twitter.py         |  7 ++++---
 10 files changed, 67 insertions(+), 40 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/500px.py b/searx/engines/500px.py
index 5d53af32c..3b95619a1 100644
--- a/searx/engines/500px.py
+++ b/searx/engines/500px.py
@@ -35,9 +35,9 @@ def request(query, params):
 # get response from search-request
 def response(resp):
     results = []
-    
+
     dom = html.fromstring(resp.text)
-    
+
     # parse results
     for result in dom.xpath('//div[@class="photo"]'):
         link = result.xpath('.//a')[0]
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index d42339af8..9bc5cdfd4 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -81,7 +81,7 @@ def load_engine(engine_data):
         if engine_attr.startswith('_'):
             continue
         if getattr(engine, engine_attr) is None:
-            print('[E] Engine config error: Missing attribute "{0}.{1}"'\
+            print('[E] Engine config error: Missing attribute "{0}.{1}"'
                   .format(engine.name, engine_attr))
             sys.exit(1)
 
@@ -102,7 +102,7 @@ def load_engine(engine_data):
     if engine.shortcut:
         # TODO check duplications
         if engine.shortcut in engine_shortcuts:
-            print('[E] Engine config error: ambigious shortcut: {0}'\
+            print('[E] Engine config error: ambigious shortcut: {0}'
                   .format(engine.shortcut))
             sys.exit(1)
         engine_shortcuts[engine.shortcut] = engine.name
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 4ebfe58c1..241234fdb 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -52,7 +52,8 @@ def response(resp):
         thumbnail = result.xpath('.//img')[0].attrib.get('src')
         title = ''.join(result.xpath(title_xpath))
         content = escape(''.join(result.xpath(content_xpath)))
-        publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
+        pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
+        publishedDate = parser.parse(pubdate)
 
         # append result
         results.append({'url': url,
diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py
index 522503b53..f90903647 100644
--- a/searx/engines/flickr-noapi.py
+++ b/searx/engines/flickr-noapi.py
@@ -53,7 +53,8 @@ def response(resp):
 
     for photo in photos:
 
-        # In paged configuration, the first pages' photos are represented by a None object
+        # In paged configuration, the first pages' photos
+        # are represented by a None object
         if photo is None:
             continue
 
@@ -74,10 +75,15 @@ def response(resp):
 
         title = photo['title']
 
-        content = '<span class="photo-author">' + photo['owner']['username'] + '</span><br />'
+        content = '<span class="photo-author">' +\
+                  photo['owner']['username'] +\
+                  '</span><br />'
 
         if 'description' in photo:
-            content = content + '<span class="description">' + photo['description'] + '</span>'
+            content = content +\
+                      '<span class="description">' +\
+                      photo['description'] +\
+                      '</span>'
 
         # append result
         results.append({'url': url,
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index 2fa5ed7ec..4dadd80a6 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
 
 ## Flickr (Images)
-# 
+#
 # @website     https://www.flickr.com
-# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) 
-# 
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
+#
 # @using-api   yes
 # @results     JSON
 # @stable      yes
@@ -18,16 +18,20 @@ categories = ['images']
 
 nb_per_page = 15
 paging = True
-api_key= None
+api_key = None
 
 
-url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
+url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\
+      '&api_key={api_key}&{text}&sort=relevance' +\
+      '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\
+      '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 
 paging = True
 
+
 def build_flickr_url(user_id, photo_id):
-    return photo_url.format(userid=user_id,photoid=photo_id)
+    return photo_url.format(userid=user_id, photoid=photo_id)
 
 
 def request(query, params):
@@ -40,7 +44,7 @@ def request(query, params):
 
 def response(resp):
     results = []
-    
+
     search_results = loads(resp.text)
 
     # return empty array if there are no results
@@ -64,11 +68,14 @@ def response(resp):
         url = build_flickr_url(photo['owner'], photo['id'])
 
         title = photo['title']
-        
-        content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />'
-        
-        content = content + '<span class="description">' + photo['description']['_content'] + '</span>'
-        
+
+        content = '<span class="photo-author">' +\
+                  photo['ownername'] +\
+                  '</span><br />' +\
+                  '<span class="description">' +\
+                  photo['description']['_content'] +\
+                  '</span>'
+
         # append result
         results.append({'url': url,
                         'title': title,
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index f1fcd9e1a..16e9d6de6 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -24,7 +24,7 @@ search_url = url + 'search/{search_term}/{pageno}/'
 
 # specific xpath variables
 magnet_xpath = './/a[@title="Torrent magnet link"]'
-#content_xpath = './/font[@class="detDesc"]//text()'
+content_xpath = './/span[@class="font11px lightgrey block"]'
 
 
 # do search-request
@@ -56,7 +56,8 @@ def response(resp):
         link = result.xpath('.//a[@class="cellMainLink"]')[0]
         href = urljoin(url, link.attrib['href'])
         title = ' '.join(link.xpath('.//text()'))
-        content = escape(html.tostring(result.xpath('.//span[@class="font11px lightgrey block"]')[0], method="text"))
+        content = escape(html.tostring(result.xpath(content_xpath)[0],
+                                       method="text"))
         seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
         leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
 
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 2ba0e52f1..0f98352c1 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -11,7 +11,6 @@
 from urllib import urlencode
 from json import loads
 import cgi
-import re
 
 # engine dependent config
 categories = ['it']
@@ -33,7 +32,7 @@ def request(query, params):
 # get response from search-request
 def response(resp):
     results = []
-    
+
     search_results = loads(resp.text)
 
     # parse results
@@ -41,21 +40,22 @@ def response(resp):
         href = result['url']
         title = "" + result['name'] + " - " + result['filename']
         content = result['repo'] + "<br />"
-        
+
         lines = dict()
         for line, code in result['lines'].items():
             lines[int(line)] = code
 
         content = content + '<pre class="code-formatter"><table class="code">'
         for line, code in sorted(lines.items()):
-            content = content + '<tr><td class="line-number" style="padding-right:5px;">' 
-            content = content + str(line) + '</td><td class="code-snippet">' 
-            # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary
-            content = content + cgi.escape(code).replace('\t', '    ').replace('  ', '&nbsp; ').replace('  ', ' &nbsp;') 
+            content = content + '<tr><td class="line-number" style="padding-right:5px;">'
+            content = content + str(line) + '</td><td class="code-snippet">'
+            # Replace every two spaces with ' &nbps;' to keep formatting
+            # while allowing the browser to break the line if necessary
+            content = content + cgi.escape(code).replace('\t', '    ').replace('  ', '&nbsp; ').replace('  ', ' &nbsp;')
             content = content + "</td></tr>"
-            
+
         content = content + "</table></pre>"
-        
+
         # append result
         results.append({'url': href,
                         'title': title,
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
index e07cbeab9..b5b7159be 100644
--- a/searx/engines/searchcode_doc.py
+++ b/searx/engines/searchcode_doc.py
@@ -31,15 +31,22 @@ def request(query, params):
 # get response from search-request
 def response(resp):
     results = []
-    
+
     search_results = loads(resp.text)
 
     # parse results
     for result in search_results['results']:
         href = result['url']
-        title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name']
-        content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description']
-        
+        title = "[" + result['type'] + "] " +\
+                result['namespace'] +\
+                " " + result['name']
+        content = '<span class="highlight">[' +\
+                  result['type'] + "] " +\
+                  result['name'] + " " +\
+                  result['synopsis'] +\
+                  "</span><br />" +\
+                  result['description']
+
         # append result
         results.append({'url': href,
                         'title': title,
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 2f1636f59..c413dcf26 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -60,10 +60,14 @@ def response(resp):
 
         content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
         content = content + " - "
-        content = content + html.tostring(result.xpath('.//div[contains(@class,"grey-web")]')[0], method='text')
+        text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
+        content = content + html.tostring(text, method='text')
 
         if result.xpath(".//span") != []:
-            content = content + " - (" + result.xpath(".//span//text()")[0].strip() + ")"
+            content = content +\
+                      " - (" +\
+                      result.xpath(".//span//text()")[0].strip() +\
+                      ")"
 
         # append result
         results.append({'url': href,
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 5a7046c83..bd9a8c2fc 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -28,7 +28,7 @@ search_url = base_url+'search?'
 results_xpath = '//li[@data-item-type="tweet"]'
 link_xpath = './/small[@class="time"]//a'
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
-content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
+content_xpath = './/p[@class="js-tweet-text tweet-text"]'
 timestamp_xpath = './/span[contains(@class,"_timestamp")]'
 
 
@@ -54,10 +54,11 @@ def response(resp):
         link = tweet.xpath(link_xpath)[0]
         url = urljoin(base_url, link.attrib.get('href'))
         title = ''.join(tweet.xpath(title_xpath))
-        content = escape(''.join(tweet.xpath(content_xpath)))
+        content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
         pubdate = tweet.xpath(timestamp_xpath)
         if len(pubdate) > 0:
-            publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
+            timestamp = float(pubdate[0].attrib.get('data-time'))
+            publishedDate = datetime.fromtimestamp(timestamp, None)
             # append result
             results.append({'url': url,
                             'title': title,
-- 
cgit v1.2.3


From 2181c4384ed4d41c795799a345974269327bf641 Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Thu, 1 Jan 2015 14:14:56 +0100
Subject: [mod] purge local html_to_text

---
 searx/engines/duckduckgo_definitions.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 8f81d2c8e..b66d6c0f2 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,6 +1,7 @@
 import json
 from urllib import urlencode
 from lxml import html
+from searx.utils import html_to_text
 from searx.engines.xpath import extract_text
 
 url = 'https://api.duckduckgo.com/'\
@@ -17,11 +18,6 @@ def result_to_text(url, text, htmlResult):
         return text
 
 
-def html_to_text(htmlFragment):
-    dom = html.fromstring(htmlFragment)
-    return extract_text(dom)
-
-
 def request(query, params):
     # TODO add kl={locale}
     params['url'] = url.format(query=urlencode({'q': query}))
-- 
cgit v1.2.3


From cc4e17b6686dbefe0d57862e045f98f72a4e58fc Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Fri, 2 Jan 2015 12:33:40 +0100
Subject: [fix] pep8

---
 searx/engines/flickr-noapi.py   | 6 +++---
 searx/engines/subtitleseeker.py | 6 +++---
 searx/engines/wikidata.py       | 5 ++---
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py
index f90903647..aa2fa5d3b 100644
--- a/searx/engines/flickr-noapi.py
+++ b/searx/engines/flickr-noapi.py
@@ -81,9 +81,9 @@ def response(resp):
 
         if 'description' in photo:
             content = content +\
-                      '<span class="description">' +\
-                      photo['description'] +\
-                      '</span>'
+                '<span class="description">' +\
+                photo['description'] +\
+                '</span>'
 
         # append result
         results.append({'url': url,
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index c413dcf26..9aaf1947b 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -65,9 +65,9 @@ def response(resp):
 
         if result.xpath(".//span") != []:
             content = content +\
-                      " - (" +\
-                      result.xpath(".//span//text()")[0].strip() +\
-                      ")"
+                " - (" +\
+                result.xpath(".//span//text()")[0].strip() +\
+                ")"
 
         # append result
         results.append({'url': href,
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index bda80cdca..df976ae35 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -2,7 +2,6 @@ import json
 from requests import get
 from urllib import urlencode
 import locale
-import time
 import dateutil.parser
 
 result_count = 1
@@ -38,7 +37,7 @@ def response(resp):
     language = resp.search_params['language'].split('_')[0]
     if language == 'all':
         language = 'en'
-    
+
     try:
         locale.setlocale(locale.LC_ALL, str(resp.search_params['language']))
     except:
@@ -47,7 +46,7 @@ def response(resp):
         except:
             pass
         pass
-    
+
     url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids),
                                             'languages': language + '|en'}))
 
-- 
cgit v1.2.3


From 4450ed5503ab9f7b4d0dc1849837523bbe3b56dd Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sat, 3 Jan 2015 23:54:02 +0100
Subject: Digg correction Return no result instead of crashing if no result

---
 searx/engines/digg.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'searx/engines')

diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 241234fdb..8c457d6b9 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -44,6 +44,9 @@ def response(resp):
 
     search_result = loads(resp.text)
 
+    if search_result['html'] == '':
+        return results
+
     dom = html.fromstring(search_result['html'])
 
     # parse results
-- 
cgit v1.2.3


From 3aa3a4633f50fa50693636113a4141e266db90d7 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sat, 3 Jan 2015 23:55:50 +0100
Subject: Few fixes on Vimeo Change URL from https to http Change way of
 handling text xpath

---
 searx/engines/vimeo.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index c66c4148a..3949a7299 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -13,24 +13,23 @@
 # @todo        set content-parameter with correct data
 
 from urllib import urlencode
-from HTMLParser import HTMLParser
 from lxml import html
-from searx.engines.xpath import extract_text
 from dateutil import parser
+from cgi import escape
 
 # engine dependent config
 categories = ['videos']
 paging = True
 
 # search-url
-base_url = 'https://vimeo.com'
+base_url = 'http://vimeo.com'
 search_url = base_url + '/search/page:{pageno}?{query}'
 
 # specific xpath variables
+results_xpath = '//div[@id="browse_content"]/ol/li'
 url_xpath = './a/@href'
+title_xpath = './a/div[@class="data"]/p[@class="title"]'
 content_xpath = './a/img/@src'
-title_xpath = './a/div[@class="data"]/p[@class="title"]/text()'
-results_xpath = '//div[@id="browse_content"]/ol/li'
 publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
 
 
@@ -39,10 +38,6 @@ def request(query, params):
     params['url'] = search_url.format(pageno=params['pageno'],
                                       query=urlencode({'q': query}))
 
-    # TODO required?
-    params['cookies']['__utma'] =\
-        '00000000.000#0000000.0000000000.0000000000.0000000000.0'
-
     return params
 
 
@@ -52,15 +47,12 @@ def response(resp):
 
     dom = html.fromstring(resp.text)
 
-    p = HTMLParser()
-
     # parse results
     for result in dom.xpath(results_xpath):
         url = base_url + result.xpath(url_xpath)[0]
-        title = p.unescape(extract_text(result.xpath(title_xpath)))
-        thumbnail = extract_text(result.xpath(content_xpath)[0])
-        publishedDate = parser.parse(extract_text(
-            result.xpath(publishedDate_xpath)[0]))
+        title = escape(html.tostring(result.xpath(title_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
+        thumbnail = result.xpath(content_xpath)[0]
+        publishedDate = parser.parse(result.xpath(publishedDate_xpath)[0])
 
         # append result
         results.append({'url': url,
-- 
cgit v1.2.3


From 4a195e0b28fdd940e046c442032c816095416fec Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 5 Jan 2015 02:04:23 +0100
Subject: Integrated media in results + Deezer Engine New "embedded" item for
 the results, allow to give an iframe to display the media directly in the
 results. Note that the attributes src of the iframes are not set, but instead
 data-src is set, allowing to only load the iframe when clicked.

Deezer engine based on public API (no key).
---
 searx/engines/dailymotion.py | 15 ++++++++---
 searx/engines/deezer.py      | 62 ++++++++++++++++++++++++++++++++++++++++++++
 searx/engines/soundcloud.py  | 14 ++++++++--
 searx/engines/vimeo.py       | 11 ++++++--
 searx/engines/youtube.py     | 13 ++++++++--
 5 files changed, 105 insertions(+), 10 deletions(-)
 create mode 100644 searx/engines/deezer.py

(limited to 'searx/engines')

diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index a5bffa866..03b1dbb8b 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -6,12 +6,14 @@
 # @using-api   yes
 # @results     JSON
 # @stable      yes
-# @parse       url, title, thumbnail
+# @parse       url, title, thumbnail, publishedDate, embedded
 #
 # @todo        set content-parameter with correct data
 
 from urllib import urlencode
 from json import loads
+from cgi import escape
+from datetime import datetime
 
 # engine dependent config
 categories = ['videos']
@@ -20,7 +22,9 @@ language_support = True
 
 # search-url
 # see http://www.dailymotion.com/doc/api/obj-video.html
-search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=5&page={pageno}&{query}'  # noqa
+search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}'  # noqa
+embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
+    'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
 
 
 # do search-request
@@ -51,14 +55,17 @@ def response(resp):
     for res in search_res['list']:
         title = res['title']
         url = res['url']
-        #content = res['description']
-        content = ''
+        content = escape(res['description'])
         thumbnail = res['thumbnail_360_url']
+        publishedDate = datetime.fromtimestamp(res['created_time'], None)
+        embedded = embedded_url.format(videoid=res['id'])
 
         results.append({'template': 'videos.html',
                         'url': url,
                         'title': title,
                         'content': content,
+                        'publishedDate': publishedDate,
+                        'embedded': embedded,
                         'thumbnail': thumbnail})
 
     # return results
diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py
new file mode 100644
index 000000000..6c26b6aeb
--- /dev/null
+++ b/searx/engines/deezer.py
@@ -0,0 +1,62 @@
+## Deezer (Music)
+#
+# @website     https://deezer.com
+# @provide-api yes (http://developers.deezer.com/api/)
+#
+# @using-api   yes
+# @results     JSON
+# @stable      yes
+# @parse       url, title, content, embedded
+
+from json import loads
+from urllib import urlencode
+
+# engine dependent config
+categories = ['music']
+paging = True
+
+# search-url
+url = 'http://api.deezer.com/'
+search_url = url + 'search?{query}&index={offset}'
+
+embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
+    'data-src="http://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' +\
+    'width="540" height="80"></iframe>'
+
+
+# do search-request
+def request(query, params):
+    offset = (params['pageno'] - 1) * 25
+
+    params['url'] = search_url.format(query=urlencode({'q': query}),
+                                      offset=offset)
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    search_res = loads(resp.text)
+
+    # parse results
+    for result in search_res.get('data', []):
+        if result['type'] == 'track':
+            print result
+            title = result['title']
+            url = result['link']
+            content = result['artist']['name'] +\
+                " &bull; " +\
+                result['album']['title'] +\
+                " &bull; " + result['title']
+            embedded = embedded_url.format(audioid=result['id'])
+
+            # append result
+            results.append({'url': url,
+                            'title': title,
+                            'embedded': embedded,
+                            'content': content})
+
+    # return results
+    return results
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index 164a569a3..44374af6f 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -6,10 +6,11 @@
 # @using-api   yes
 # @results     JSON
 # @stable      yes
-# @parse       url, title, content
+# @parse       url, title, content, publishedDate, embedded
 
 from json import loads
-from urllib import urlencode
+from urllib import urlencode, quote_plus
+from dateutil import parser
 
 # engine dependent config
 categories = ['music']
@@ -27,6 +28,10 @@ search_url = url + 'search?{query}'\
                          '&linked_partitioning=1'\
                          '&client_id={client_id}'   # noqa
 
+embedded_url = '<iframe width="100%" height="166" ' +\
+    'scrolling="no" frameborder="no" ' +\
+    'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
+
 
 # do search-request
 def request(query, params):
@@ -50,10 +55,15 @@ def response(resp):
         if result['kind'] in ('track', 'playlist'):
             title = result['title']
             content = result['description']
+            publishedDate = parser.parse(result['last_modified'])
+            uri = quote_plus(result['uri'])
+            embedded = embedded_url.format(uri=uri)
 
             # append result
             results.append({'url': result['permalink_url'],
                             'title': title,
+                            'publishedDate': publishedDate,
+                            'embedded': embedded,
                             'content': content})
 
     # return results
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index c66c4148a..fd945b319 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -7,7 +7,7 @@
 # @using-api   no (TODO, rewrite to api)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
-# @parse       url, title, publishedDate,  thumbnail
+# @parse       url, title, publishedDate,  thumbnail, embedded
 #
 # @todo        rewrite to api
 # @todo        set content-parameter with correct data
@@ -33,6 +33,10 @@ title_xpath = './a/div[@class="data"]/p[@class="title"]/text()'
 results_xpath = '//div[@id="browse_content"]/ol/li'
 publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
 
+embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\
+    'width="540" height="304" frameborder="0" ' +\
+    'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
+
 
 # do search-request
 def request(query, params):
@@ -56,11 +60,13 @@ def response(resp):
 
     # parse results
     for result in dom.xpath(results_xpath):
-        url = base_url + result.xpath(url_xpath)[0]
+        videoid = result.xpath(url_xpath)[0]
+        url = base_url + videoid
         title = p.unescape(extract_text(result.xpath(title_xpath)))
         thumbnail = extract_text(result.xpath(content_xpath)[0])
         publishedDate = parser.parse(extract_text(
             result.xpath(publishedDate_xpath)[0]))
+        embedded = embedded_url.format(videoid=videoid)
 
         # append result
         results.append({'url': url,
@@ -68,6 +74,7 @@ def response(resp):
                         'content': '',
                         'template': 'videos.html',
                         'publishedDate': publishedDate,
+                        'embedded': embedded,
                         'thumbnail': thumbnail})
 
     # return results
diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py
index 973e799f8..59f07c574 100644
--- a/searx/engines/youtube.py
+++ b/searx/engines/youtube.py
@@ -6,7 +6,7 @@
 # @using-api   yes
 # @results     JSON
 # @stable      yes
-# @parse       url, title, content, publishedDate, thumbnail
+# @parse       url, title, content, publishedDate, thumbnail, embedded
 
 from json import loads
 from urllib import urlencode
@@ -19,7 +19,11 @@ language_support = True
 
 # search-url
 base_url = 'https://gdata.youtube.com/feeds/api/videos'
-search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5'  # noqa
+search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5'
+
+embedded_url = '<iframe width="540" height="304" ' +\
+    'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
+    'frameborder="0" allowfullscreen></iframe>'
 
 
 # do search-request
@@ -60,6 +64,8 @@ def response(resp):
         if url.endswith('&'):
             url = url[:-1]
 
+        videoid = url[32:]
+
         title = result['title']['$t']
         content = ''
         thumbnail = ''
@@ -72,12 +78,15 @@ def response(resp):
 
         content = result['content']['$t']
 
+        embedded = embedded_url.format(videoid=videoid)
+
         # append result
         results.append({'url': url,
                         'title': title,
                         'content': content,
                         'template': 'videos.html',
                         'publishedDate': publishedDate,
+                        'embedded': embedded,
                         'thumbnail': thumbnail})
 
     # return results
-- 
cgit v1.2.3


From 0ca04be55dec06c5ef737febb128d3dc36c3b5d7 Mon Sep 17 00:00:00 2001
From: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 5 Jan 2015 20:24:33 +0100
Subject: Remove print

---
 searx/engines/deezer.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'searx/engines')

diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py
index 6c26b6aeb..433ceffa1 100644
--- a/searx/engines/deezer.py
+++ b/searx/engines/deezer.py
@@ -43,7 +43,6 @@ def response(resp):
     # parse results
     for result in search_res.get('data', []):
         if result['type'] == 'track':
-            print result
             title = result['title']
             url = result['link']
             content = result['artist']['name'] +\
-- 
cgit v1.2.3


From 299a80a1eb2eecb80f5c50da261a9eab1900b572 Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Fri, 9 Jan 2015 04:13:05 +0100
Subject: [enh] using the logger

---
 searx/engines/__init__.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'searx/engines')

diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 9bc5cdfd4..643b107a5 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -22,6 +22,10 @@ from imp import load_source
 from flask.ext.babel import gettext
 from operator import itemgetter
 from searx import settings
+from searx import logger
+
+
+logger = logger.getChild('engines')
 
 engine_dir = dirname(realpath(__file__))
 
@@ -81,7 +85,7 @@ def load_engine(engine_data):
         if engine_attr.startswith('_'):
             continue
         if getattr(engine, engine_attr) is None:
-            print('[E] Engine config error: Missing attribute "{0}.{1}"'
+            logger.error('Missing engine config attribute: "{0}.{1}"'
                   .format(engine.name, engine_attr))
             sys.exit(1)
 
@@ -100,9 +104,8 @@ def load_engine(engine_data):
         categories['general'].append(engine)
 
     if engine.shortcut:
-        # TODO check duplications
         if engine.shortcut in engine_shortcuts:
-            print('[E] Engine config error: ambigious shortcut: {0}'
+            logger.error('Engine config error: ambigious shortcut: {0}'
                   .format(engine.shortcut))
             sys.exit(1)
         engine_shortcuts[engine.shortcut] = engine.name
@@ -199,7 +202,7 @@ def get_engines_stats():
 
 
 if 'engines' not in settings or not settings['engines']:
-    print '[E] Error no engines found. Edit your settings.yml'
+    logger.error('No engines found. Edit your settings.yml')
     exit(2)
 
 for engine_data in settings['engines']:
-- 
cgit v1.2.3


From c8be128e97479ea6c871c4b6fbf014fa8136e708 Mon Sep 17 00:00:00 2001
From: Adam Tauber <asciimoo@gmail.com>
Date: Fri, 9 Jan 2015 11:21:46 +0100
Subject: [mod] ignore startpage unicode errors

---
 searx/engines/startpage.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'searx/engines')

diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 16da728cd..70b193952 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -66,7 +66,10 @@ def response(resp):
             continue
         link = links[0]
         url = link.attrib.get('href')
-        title = escape(link.text_content())
+        try:
+            title = escape(link.text_content())
+        except UnicodeDecodeError:
+            continue
 
         # block google-ad url's
         if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
-- 
cgit v1.2.3