From cf09b500f35fd1bca3fc9cc853bd7ea932220e4e Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Sun, 3 Apr 2016 22:03:41 +0200 Subject: Add support for dokuwiki engine --- searx/engines/doku.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 searx/engines/doku.py (limited to 'searx') diff --git a/searx/engines/doku.py b/searx/engines/doku.py new file mode 100644 index 000000000..18abe75e5 --- /dev/null +++ b/searx/engines/doku.py @@ -0,0 +1,83 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://localhost:8090' +search_url = '/?do=search'\ + '&id={query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'query': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results -- cgit v1.2.3 From f2d1a530fb8126f66967edc24132eac13dae394d Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Sun, 3 Apr 2016 22:05:03 +0200 Subject: Add ubuntu-fr wiki with new doku engine --- searx/settings.yml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'searx') diff --git a/searx/settings.yml b/searx/settings.yml index 462a0bcc2..439910d92 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -337,6 +337,11 @@ engines: # number_of_results : 5 # timeout : 3.0 + - name : ubuntuwiki + engine : doku + shortcut : uw + base_url : 'http://doc.ubuntu-fr.org' + locales: en : English bg : Български (Bulgarian) -- cgit v1.2.3 From f26f0dab2e4e6a6f77ea9f04f36fe2eb2d6893df Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Mon, 4 Apr 2016 13:38:22 +0200 Subject: Fix pep8 E302 Cf. http://legacy.python.org/dev/peps/pep-0008/#blank-lines --- searx/engines/doku.py | 1 + 1 file changed, 1 insertion(+) (limited to 'searx') diff --git a/searx/engines/doku.py b/searx/engines/doku.py index 18abe75e5..233fd2233 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -27,6 +27,7 @@ search_url = '/?do=search'\ # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ + # do search-request def request(query, params): -- cgit v1.2.3 From 2733a92383f7f8127cdf4871c8091b0489ba7356 Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Tue, 5 Apr 2016 13:31:49 +0200 Subject: Fix query encoding --- searx/engines/doku.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'searx') diff --git a/searx/engines/doku.py b/searx/engines/doku.py index 233fd2233..93867fd0d 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -23,7 +23,7 @@ number_of_results = 5 # Doku is OpenSearch compatible base_url = 'http://localhost:8090' search_url = '/?do=search'\ - '&id={query}' + '&{query}' # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ @@ -32,7 +32,7 @@ search_url = '/?do=search'\ def request(query, params): params['url'] = base_url +\ - search_url.format(query=urlencode({'query': query})) + search_url.format(query=urlencode({'id': query})) return params -- cgit v1.2.3 From 51cb832601499dedb38285d09c9db222a2bcab1d Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Sat, 9 Apr 2016 22:21:25 +0200 Subject: Comment out ubuntu-fr as it is not a general search engine --- searx/settings.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'searx') diff --git a/searx/settings.yml b/searx/settings.yml index 439910d92..40f569e9f 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -337,10 +337,12 @@ engines: # number_of_results : 5 # timeout : 3.0 - - name : ubuntuwiki - engine : doku - shortcut : uw - base_url : 'http://doc.ubuntu-fr.org' +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name : ubuntuwiki +# engine : doku +# shortcut : uw +# base_url : 'http://doc.ubuntu-fr.org' locales: en : English -- cgit v1.2.3