diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2018-03-22 14:41:42 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-03-22 14:41:42 +0100 |
| commit | 0cb55ddfde3a2687ca7a647ac95ffe484e12471b (patch) | |
| tree | b21dd9ad903afe756ce43ce996e1806e26cbd2c1 /searx/engines | |
| parent | b9d4c0523e8d6eab81658d77f8213b39f9b28f17 (diff) | |
| parent | 8cc529e9a3976e48676676600379ce43f690dd8c (diff) | |
Merge pull request #1136 from kvch/add-findx-general
Add findx engine
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/findx.py | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/searx/engines/findx.py b/searx/engines/findx.py new file mode 100644 index 000000000..db4a1aa5f --- /dev/null +++ b/searx/engines/findx.py @@ -0,0 +1,115 @@ +""" +FindX (General, Images, Videos) + +@website https://www.findx.com +@provide-api no +@using-api no +@results HTML +@stable no +@parse url, title, content, embedded, img_src, thumbnail_src +""" + +from dateutil import parser +from json import loads +import re + +from lxml import html + +from searx import logger +from searx.engines.xpath import extract_text +from searx.engines.youtube_noapi import base_youtube_url, embedded_url +from searx.url_utils import urlencode + + +paging = True +results_xpath = '//script[@id="initial-state"]' +search_url = 'https://www.findx.com/{category}?{q}' +type_map = { + 'none': 'web', + 'general': 'web', + 'images': 'images', + 'videos': 'videos', +} + + +def request(query, params): + params['url'] = search_url.format( + category=type_map[params['category']], + q=urlencode({ + 'q': query, + 'page': params['pageno'] + }) + ) + return params + + +def response(resp): + dom = html.fromstring(resp.text) + results_raw_json = dom.xpath(results_xpath) + results_json = loads(extract_text(results_raw_json)) + + if len(results_json['web']['results']) > 0: + return _general_results(results_json['web']['results']) + + if len(results_json['images']['results']) > 0: + return _images_results(results_json['images']['results']) + + if len(results_json['video']['results']) > 0: + return _videos_results(results_json['video']['results']) + + return [] + + +def _general_results(general_results): + results = [] + for result in general_results: + results.append({ + 'url': result['url'], + 'title': result['title'], + 'content': result['sum'], + }) + return results + + +def _images_results(image_results): + results = [] + for result in image_results: + results.append({ + 'url': result['sourceURL'], + 'title': result['title'], + 'content': result['source'], + 'thumbnail_src': _extract_url(result['assets']['thumb']['url']), + 'img_src': _extract_url(result['assets']['file']['url']), + 'template': 'images.html', + }) + return results + + +def _videos_results(video_results): + results = [] + for result in video_results: + if not result['kind'].startswith('youtube'): + logger.warn('Unknown video kind in findx: {}'.format(result['kind'])) + continue + + description = result['snippet']['description'] + if len(description) > 300: + description = description[:300] + '...' + + results.append({ + 'url': base_youtube_url + result['id'], + 'title': result['snippet']['title'], + 'content': description, + 'thumbnail': _extract_url(result['snippet']['thumbnails']['default']['url']), + 'publishedDate': parser.parse(result['snippet']['publishedAt']), + 'embedded': embedded_url.format(videoid=result['id']), + 'template': 'videos.html', + }) + return results + + +def _extract_url(url): + matching = re.search('(/https?://[^)]+)', url) + if matching: + return matching.group(0)[1:] + return '' |