diff options
| author | pw3t <romain@berthor.fr> | 2014-01-23 22:11:36 +0100 |
|---|---|---|
| committer | pw3t <romain@berthor.fr> | 2014-01-23 22:11:36 +0100 |
| commit | 132681b3aaf5b330d9d19624038b51fe2ebfd8d5 (patch) | |
| tree | 393114f41b487eea4b71dd4073903726310a1257 /searx/engines/filecrop.py | |
| parent | d6b017efb5b51623a02c85690c7335cfc6674092 (diff) | |
| parent | 59eeeaab87951fd6fa3302ec240db98902a20b2c (diff) | |
Merge branch 'master' of https://github.com/asciimoo/searx
Diffstat (limited to 'searx/engines/filecrop.py')
| -rw-r--r-- | searx/engines/filecrop.py | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py index 52426b84a..81340e601 100644 --- a/searx/engines/filecrop.py +++ b/searx/engines/filecrop.py @@ -2,7 +2,8 @@ from urllib import urlencode from HTMLParser import HTMLParser url = 'http://www.filecrop.com/' -search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' +search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' # noqa + class FilecropResultParser(HTMLParser): def __init__(self): @@ -18,22 +19,28 @@ class FilecropResultParser(HTMLParser): def handle_starttag(self, tag, attrs): if tag == 'tr': - if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs: + if ('bgcolor', '#edeff5') in attrs or\ + ('bgcolor', '#ffffff') in attrs: self.__start_processing = True if not self.__start_processing: return if tag == 'label': - self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0] - elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs: + self.result['title'] = [attr[1] for attr in attrs + if attr[0] == 'title'][0] + elif tag == 'a' and ('rel', 'nofollow') in attrs\ + and ('class', 'sourcelink') in attrs: if 'content' in self.result: - self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0] + self.result['content'] += [attr[1] for attr in attrs + if attr[0] == 'title'][0] else: - self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0] + self.result['content'] = [attr[1] for attr in attrs + if attr[0] == 'title'][0] self.result['content'] += ' ' elif tag == 'a': - self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0] + self.result['url'] = url + [attr[1] for attr in attrs + if attr[0] == 'href'][0] def handle_endtag(self, tag): if self.__start_processing is False: @@ -60,10 +67,12 @@ class FilecropResultParser(HTMLParser): self.data_counter += 1 + def request(query, params): - params['url'] = search_url.format(query=urlencode({'w' :query})) + params['url'] = search_url.format(query=urlencode({'w': query})) return params + def response(resp): parser = FilecropResultParser() parser.feed(resp.text) |