Merge branch 'master' of https://github.com/asciimoo/searx into feature/seedpeer-engine-integration

Resolved conflict searx/settings.yml
author: Pydo <pydo@tutanota.com> 2016-10-01 10:46:18 -0400
committer: Pydo <pydo@tutanota.com> 2016-10-01 10:46:18 -0400
commit: 55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea (patch)
tree: 96e953057dd3fc29681039f7ac5b282dac189ee8 /searx/engines/dictzone.py
parent: 6f87bf2a1c76f1b94ad2119df7fb938c2307e370 (diff)
parent: 295fc9ce96d8cca9c6c4776a00e5fb0942eb6f4d (diff)
1 files changed, 69 insertions, 0 deletions
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
new file mode 100644
index 000000000..9765d5f60
--- /dev/null
+++ b/searx/engines/dictzone.py
@@ -0,0 +1,69 @@
+"""
+ Dictzone
+
+ @website     https://dictzone.com/
+ @provide-api no
+ @using-api   no
+ @results     HTML (using search portal)
+ @stable      no (HTML can change)
+ @parse       url, title, content
+"""
+
+import re
+from urlparse import urljoin
+from lxml import html
+from cgi import escape
+from searx.utils import is_valid_lang
+
+categories = ['general']
+url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+weight = 100
+
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+results_xpath = './/table[@id="r"]/tr'
+
+
+def request(query, params):
+    m = parser_re.match(unicode(query, 'utf8'))
+    if not m:
+        return params
+
+    from_lang, to_lang, query = m.groups()
+
+    from_lang = is_valid_lang(from_lang)
+    to_lang = is_valid_lang(to_lang)
+
+    if not from_lang or not to_lang:
+        return params
+
+    params['url'] = url.format(from_lang=from_lang[2],
+                               to_lang=to_lang[2],
+                               query=query)
+
+    return params
+
+
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+
+    for k, result in enumerate(dom.xpath(results_xpath)[1:]):
+        try:
+            from_result, to_results_raw = result.xpath('./td')
+        except:
+            continue
+
+        to_results = []
+        for to_result in to_results_raw.xpath('./p/a'):
+            t = to_result.text_content()
+            if t.strip():
+                to_results.append(to_result.text_content())
+
+        results.append({
+            'url': urljoin(resp.url, '?%d' % k),
+            'title': escape(from_result.text_content()),
+            'content': escape('; '.join(to_results))
+        })
+
+    return results
author	Pydo <pydo@tutanota.com>	2016-10-01 10:46:18 -0400
committer	Pydo <pydo@tutanota.com>	2016-10-01 10:46:18 -0400
commit	55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea (patch)
tree	96e953057dd3fc29681039f7ac5b282dac189ee8 /searx/engines/dictzone.py
parent	6f87bf2a1c76f1b94ad2119df7fb938c2307e370 (diff)
parent	295fc9ce96d8cca9c6c4776a00e5fb0942eb6f4d (diff)