summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorasciimoo <asciimoo@gmail.com>2013-10-15 19:11:43 +0200
committerasciimoo <asciimoo@gmail.com>2013-10-15 19:11:43 +0200
commitd793c2733c7aac3aacf40f3f5cf9fc0919305e76 (patch)
tree0a6f500a17f14d9da4921ad8fbeaf63f1ebccac1 /searx/engines
parentc3b7ed868783691d5678977779e91568cc2f2fec (diff)
[enh] engine types
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py14
-rw-r--r--searx/engines/duckduckgo.py19
-rw-r--r--searx/engines/duckduckgo_definitions.py12
3 files changed, 26 insertions, 19 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index ced673bde..86fa50d2a 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -6,7 +6,7 @@ import grequests
engine_dir = dirname(realpath(__file__))
-engines = []
+engines = {}
for filename in listdir(engine_dir):
modname = splitext(filename)[0]
@@ -16,14 +16,16 @@ for filename in listdir(engine_dir):
engine = load_source(modname, filepath)
if not hasattr(engine, 'request') or not hasattr(engine, 'response'):
continue
- engines.append(engine)
+ engines[modname] = engine
def default_request_params():
return {'method': 'GET', 'headers': {}, 'data': {}, 'url': ''}
-def make_callback(results, callback):
+def make_callback(engine_name, results, callback):
def process_callback(response, **kwargs):
- results.extend(callback(response))
+ for result in callback(response):
+ result['engine'] = engine_name
+ results.append(result)
return process_callback
def search(query, request):
@@ -31,11 +33,11 @@ def search(query, request):
requests = []
results = []
user_agent = request.headers.get('User-Agent', '')
- for engine in engines:
+ for ename, engine in engines.items():
headers = default_request_params()
headers['User-Agent'] = user_agent
request_params = engine.request(query, headers)
- callback = make_callback(results, engine.response)
+ callback = make_callback(ename, results, engine.response)
if request_params['method'] == 'GET':
req = grequests.get(request_params['url']
,headers=headers
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index ed93829ba..74c17a31b 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -1,14 +1,19 @@
-from lxml import html
+from json import loads
def request(query, params):
- params['method'] = 'POST'
- params['url'] = 'https://duckduckgo.com/html'
- params['data']['q'] = query
+ params['url'] = 'https://duckduckgo.com/d.js?q=%s&l=us-en&p=1&s=0' % query
return params
def response(resp):
- dom = html.fromstring(resp.text)
- results = dom.xpath('//div[@class="results_links results_links_deep web-result"]')
- return [html.tostring(x) for x in results]
+ results = []
+ search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
+ for r in search_res:
+ if not r.get('t'):
+ continue
+ results.append({'title': r['t']
+ ,'content': r['a']
+ ,'url': r['u']
+ })
+ return results
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index de694e02a..531b53ccc 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,5 +1,4 @@
import json
-from searx import base_result_template
def request(query, params):
params['url'] = 'http://api.duckduckgo.com/?q=%s&format=json&pretty=0' % query
@@ -10,10 +9,11 @@ def response(resp):
search_res = json.loads(resp.text)
results = []
if 'Definition' in search_res:
- res = {'title' : search_res.get('Heading', '')
- ,'content' : search_res.get('Definition', '')
- ,'url' : search_res.get('AbstractURL', '')
- }
- results.append(base_result_template.format(**res))
+ if search_res.get('AbstractURL'):
+ res = {'title' : search_res.get('Heading', '')
+ ,'content' : search_res.get('Definition', '')
+ ,'url' : search_res.get('AbstractURL', '')
+ }
+ results.append(res)
return results