diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/__init__.py | 6 | ||||
| -rw-r--r-- | searx/engines/flickr_noapi.py | 11 | ||||
| -rw-r--r-- | searx/engines/www1x.py | 82 | ||||
| -rw-r--r-- | searx/engines/yacy.py | 25 |
4 files changed, 105 insertions, 19 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 643b107a5..21a307501 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -69,17 +69,17 @@ def load_engine(engine_data): engine.categories = ['general'] if not hasattr(engine, 'language_support'): - # engine.language_support = False engine.language_support = True if not hasattr(engine, 'timeout'): - # engine.language_support = False engine.timeout = settings['server']['request_timeout'] if not hasattr(engine, 'shortcut'): - # engine.shortcut = ''' engine.shortcut = '' + if not hasattr(engine, 'disabled'): + engine.disabled = False + # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 73dff44c4..3a83fdc65 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -13,6 +13,10 @@ from urllib import urlencode from json import loads import re +from searx.engines import logger + + +logger = logger.getChild('flickr-noapi') categories = ['images'] @@ -62,10 +66,11 @@ def response(resp): # From the biggest to the lowest format for image_size in image_sizes: if image_size in photo['sizes']: - img_src = photo['sizes'][image_size]['displayUrl'] + img_src = photo['sizes'][image_size]['url'] break if not img_src: + logger.debug('cannot find valid image size: {0}'.format(repr(photo))) continue if 'id' not in photo['owner']: @@ -73,9 +78,9 @@ def response(resp): # For a bigger thumbnail, keep only the url_z, not the url_n if 'n' in photo['sizes']: - thumbnail_src = photo['sizes']['n']['displayUrl'] + thumbnail_src = photo['sizes']['n']['url'] elif 'z' in photo['sizes']: - thumbnail_src = photo['sizes']['z']['displayUrl'] + thumbnail_src = photo['sizes']['z']['url'] else: thumbnail_src = img_src diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py new file mode 100644 index 000000000..a68c105ce --- /dev/null +++ b/searx/engines/www1x.py @@ -0,0 +1,82 @@ +## 1x (Images) +# +# @website http://1x.com/ +# @provide-api no +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, thumbnail, img_src, content + + +from urllib import urlencode +from urlparse import urljoin +from lxml import html +import string +import re + +# engine dependent config +categories = ['images'] +paging = False + +# search-url +base_url = 'http://1x.com' +search_url = base_url+'/backend/search.php?{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + # get links from result-text + regex = re.compile('(</a>|<a)') + results_parts = re.split(regex, resp.text) + + cur_element = '' + + # iterate over link parts + for result_part in results_parts: + # processed start and end of link + if result_part == '<a': + cur_element = result_part + continue + elif result_part != '</a>': + cur_element += result_part + continue + + cur_element += result_part + + # fix xml-error + cur_element = string.replace(cur_element, '"></a>', '"/></a>') + + dom = html.fromstring(cur_element) + link = dom.xpath('//a')[0] + + url = urljoin(base_url, link.attrib.get('href')) + title = link.attrib.get('title', '') + + thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src']) + # TODO: get image with higher resolution + img_src = thumbnail_src + + # check if url is showing to a photo + if '/photo/' not in url: + continue + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'template': 'images.html'}) + + # return results + return results diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 4c4fac7df..17e2a7aab 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -68,9 +68,18 @@ def response(resp): search_results = raw_search_results.get('channels', {})[0].get('items', []) - if resp.search_params['category'] == 'general': + for result in search_results: + # parse image results + if result.get('image'): + # append result + results.append({'url': result['url'], + 'title': result['title'], + 'content': '', + 'img_src': result['image'], + 'template': 'images.html'}) + # parse general results - for result in search_results: + else: publishedDate = parser.parse(result['pubDate']) # append result @@ -79,17 +88,7 @@ def response(resp): 'content': result['description'], 'publishedDate': publishedDate}) - elif resp.search_params['category'] == 'images': - # parse image results - for result in search_results: - # append result - results.append({'url': result['url'], - 'title': result['title'], - 'content': '', - 'img_src': result['image'], - 'template': 'images.html'}) - - #TODO parse video, audio and file results + #TODO parse video, audio and file results # return results return results |