diff options
| author | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-09-02 17:28:35 +0200 |
|---|---|---|
| committer | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-09-02 17:28:35 +0200 |
| commit | c5d83059d537d8efb296ffbe743828a884ac4e10 (patch) | |
| tree | 0eb886e2f2f250428a07a1c7bd9fd468bd6f1712 /searx | |
| parent | 80b9312e42087351bb081ceab717e479e75a1ab0 (diff) | |
update generalfile engine and add comments
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/engines/generalfile.py | 31 | ||||
| -rw-r--r-- | searx/settings.yml | 1 |
2 files changed, 28 insertions, 4 deletions
diff --git a/searx/engines/generalfile.py b/searx/engines/generalfile.py index d249c00c7..11d8b6955 100644 --- a/searx/engines/generalfile.py +++ b/searx/engines/generalfile.py @@ -1,35 +1,60 @@ +## General Files (Files) +# +# @website http://www.general-files.org +# @provide-api no (nothing found) +# +# @using-api no (because nothing found) +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, content +# +# @todo detect torrents? + from lxml import html +# engine dependent config +categories = ['files'] +paging = True +# search-url base_url = 'http://www.general-file.com' search_url = base_url + '/files-{letter}/{query}/{pageno}' +# specific xpath variables result_xpath = '//table[@class="block-file"]' title_xpath = './/h2/a//text()' url_xpath = './/h2/a/@href' content_xpath = './/p//text()' -paging = True - +# do search-request def request(query, params): + params['url'] = search_url.format(query=query, letter=query[0], pageno=params['pageno']) + return params +# get response from search-request def response(resp): - results = [] + dom = html.fromstring(resp.text) + + # parse results for result in dom.xpath(result_xpath): url = result.xpath(url_xpath)[0] + # skip fast download links if not url.startswith('/'): continue + + # append result results.append({'url': base_url + url, 'title': ''.join(result.xpath(title_xpath)), 'content': ''.join(result.xpath(content_xpath))}) + # return results return results diff --git a/searx/settings.yml b/searx/settings.yml index 5a9254070..c6227212e 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -62,7 +62,6 @@ engines: - name : general-file engine : generalfile - categories : files shortcut : gf - name : github |