summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py6
-rw-r--r--searx/engines/flickr_noapi.py11
-rw-r--r--searx/engines/www1x.py82
-rw-r--r--searx/engines/yacy.py25
4 files changed, 105 insertions, 19 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 643b107a5..21a307501 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -69,17 +69,17 @@ def load_engine(engine_data):
engine.categories = ['general']
if not hasattr(engine, 'language_support'):
- # engine.language_support = False
engine.language_support = True
if not hasattr(engine, 'timeout'):
- # engine.language_support = False
engine.timeout = settings['server']['request_timeout']
if not hasattr(engine, 'shortcut'):
- # engine.shortcut = '''
engine.shortcut = ''
+ if not hasattr(engine, 'disabled'):
+ engine.disabled = False
+
# checking required variables
for engine_attr in dir(engine):
if engine_attr.startswith('_'):
diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
index 73dff44c4..3a83fdc65 100644
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -13,6 +13,10 @@
from urllib import urlencode
from json import loads
import re
+from searx.engines import logger
+
+
+logger = logger.getChild('flickr-noapi')
categories = ['images']
@@ -62,10 +66,11 @@ def response(resp):
# From the biggest to the lowest format
for image_size in image_sizes:
if image_size in photo['sizes']:
- img_src = photo['sizes'][image_size]['displayUrl']
+ img_src = photo['sizes'][image_size]['url']
break
if not img_src:
+ logger.debug('cannot find valid image size: {0}'.format(repr(photo)))
continue
if 'id' not in photo['owner']:
@@ -73,9 +78,9 @@ def response(resp):
# For a bigger thumbnail, keep only the url_z, not the url_n
if 'n' in photo['sizes']:
- thumbnail_src = photo['sizes']['n']['displayUrl']
+ thumbnail_src = photo['sizes']['n']['url']
elif 'z' in photo['sizes']:
- thumbnail_src = photo['sizes']['z']['displayUrl']
+ thumbnail_src = photo['sizes']['z']['url']
else:
thumbnail_src = img_src
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
new file mode 100644
index 000000000..a68c105ce
--- /dev/null
+++ b/searx/engines/www1x.py
@@ -0,0 +1,82 @@
+## 1x (Images)
+#
+# @website http://1x.com/
+# @provide-api no
+#
+# @using-api no
+# @results HTML
+# @stable no (HTML can change)
+# @parse url, title, thumbnail, img_src, content
+
+
+from urllib import urlencode
+from urlparse import urljoin
+from lxml import html
+import string
+import re
+
+# engine dependent config
+categories = ['images']
+paging = False
+
+# search-url
+base_url = 'http://1x.com'
+search_url = base_url+'/backend/search.php?{query}'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'q': query}))
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ # get links from result-text
+ regex = re.compile('(</a>|<a)')
+ results_parts = re.split(regex, resp.text)
+
+ cur_element = ''
+
+ # iterate over link parts
+ for result_part in results_parts:
+ # processed start and end of link
+ if result_part == '<a':
+ cur_element = result_part
+ continue
+ elif result_part != '</a>':
+ cur_element += result_part
+ continue
+
+ cur_element += result_part
+
+ # fix xml-error
+ cur_element = string.replace(cur_element, '"></a>', '"/></a>')
+
+ dom = html.fromstring(cur_element)
+ link = dom.xpath('//a')[0]
+
+ url = urljoin(base_url, link.attrib.get('href'))
+ title = link.attrib.get('title', '')
+
+ thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
+ # TODO: get image with higher resolution
+ img_src = thumbnail_src
+
+ # check if url is showing to a photo
+ if '/photo/' not in url:
+ continue
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'img_src': img_src,
+ 'content': '',
+ 'thumbnail_src': thumbnail_src,
+ 'template': 'images.html'})
+
+ # return results
+ return results
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 4c4fac7df..17e2a7aab 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -68,9 +68,18 @@ def response(resp):
search_results = raw_search_results.get('channels', {})[0].get('items', [])
- if resp.search_params['category'] == 'general':
+ for result in search_results:
+ # parse image results
+ if result.get('image'):
+ # append result
+ results.append({'url': result['url'],
+ 'title': result['title'],
+ 'content': '',
+ 'img_src': result['image'],
+ 'template': 'images.html'})
+
# parse general results
- for result in search_results:
+ else:
publishedDate = parser.parse(result['pubDate'])
# append result
@@ -79,17 +88,7 @@ def response(resp):
'content': result['description'],
'publishedDate': publishedDate})
- elif resp.search_params['category'] == 'images':
- # parse image results
- for result in search_results:
- # append result
- results.append({'url': result['url'],
- 'title': result['title'],
- 'content': '',
- 'img_src': result['image'],
- 'template': 'images.html'})
-
- #TODO parse video, audio and file results
+ #TODO parse video, audio and file results
# return results
return results