diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2014-12-20 11:58:24 +0100 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2014-12-20 11:58:24 +0100 |
| commit | 0b3d632cd0f0fb81db8bc70957ce525ef428c0dd (patch) | |
| tree | 3ae25e796b3d79df2ad2b5097e6854a9b3ee592e /searx/engines/500px.py | |
| parent | 667f4d5cfc4cb6bc0c2e47f230915d35e63bc3d2 (diff) | |
| parent | 56399cf1ea38813b3fdf81122636f420ea421aaf (diff) | |
Merge pull request #158 from Cqoicebordel/Moar-Engines
Add 500px and Searchcode engines
Diffstat (limited to 'searx/engines/500px.py')
| -rw-r--r-- | searx/engines/500px.py | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/searx/engines/500px.py b/searx/engines/500px.py new file mode 100644 index 000000000..5d53af32c --- /dev/null +++ b/searx/engines/500px.py @@ -0,0 +1,57 @@ +## 500px (Images) +# +# @website https://500px.com +# @provide-api yes (https://developers.500px.com/) +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, thumbnail, img_src, content +# +# @todo rewrite to api + + +from urllib import urlencode +from urlparse import urljoin +from lxml import html + +# engine dependent config +categories = ['images'] +paging = True + +# search-url +base_url = 'https://500px.com' +search_url = base_url+'/search?search?page={pageno}&type=photos&{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(pageno=params['pageno'], + query=urlencode({'q': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath('//div[@class="photo"]'): + link = result.xpath('.//a')[0] + url = urljoin(base_url, link.attrib.get('href')) + title = result.xpath('.//div[@class="title"]//text()')[0] + img_src = link.xpath('.//img')[0].attrib['src'] + content = result.xpath('.//div[@class="info"]//text()')[0] + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'content': content, + 'template': 'images.html'}) + + # return results + return results |