diff options
| -rw-r--r-- | searx/engines/wolframalpha_api.py | 23 | ||||
| -rw-r--r-- | searx/engines/wolframalpha_noapi.py | 83 | ||||
| -rw-r--r-- | searx/settings.yml | 16 | ||||
| -rw-r--r-- | searx/tests/engines/test_wolframalpha_api.py | 256 | ||||
| -rw-r--r-- | searx/tests/engines/test_wolframalpha_noapi.py | 255 | ||||
| -rw-r--r-- | searx/tests/test_engines.py | 2 |
6 files changed, 619 insertions, 16 deletions
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index d61d25747..540d81351 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -14,7 +14,11 @@ from lxml import etree # search-url base_url = 'http://api.wolframalpha.com/v2/query' search_url = base_url + '?appid={api_key}&{query}&format=plaintext' -api_key = '' +api_key = '' # defined in settings.yml + +# xpath variables +failure_xpath = '/queryresult[attribute::success="false"]' +answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' # do search-request @@ -45,16 +49,17 @@ def response(resp): search_results = etree.XML(resp.content) # return empty array if there are no results - if search_results.xpath('/queryresult[attribute::success="false"]'): + if search_results.xpath(failure_xpath): return [] - # parse result - result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text - result = replace_pua_chars(result) + # parse answers + answers = search_results.xpath(answer_xpath) + if answers: + for answer in answers: + answer = replace_pua_chars(answer.text) + + results.append({'answer': answer}) - # append result - # TODO: shouldn't it bind the source too? - results.append({'answer': result}) + # TODO: append a result with title and link, like in the no api version - # return results return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py new file mode 100644 index 000000000..0f0315630 --- /dev/null +++ b/searx/engines/wolframalpha_noapi.py @@ -0,0 +1,83 @@ +# WolframAlpha (Maths) +# +# @website http://www.wolframalpha.com/ +# @provide-api yes (http://api.wolframalpha.com/v2/) +# +# @using-api no +# @results HTML +# @stable no +# @parse answer + +from re import search, sub +from json import loads +from urllib import urlencode +from lxml import html +import HTMLParser + +# search-url +url = 'http://www.wolframalpha.com/' +search_url = url+'input/?{query}' + +# xpath variables +scripts_xpath = '//script' +title_xpath = '//title' +failure_xpath = '//p[attribute::class="pfail"]' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'i': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + line = None + + dom = html.fromstring(resp.text) + scripts = dom.xpath(scripts_xpath) + + # the answer is inside a js function + # answer can be located in different 'pods', although by default it should be in pod_0200 + possible_locations = ['pod_0200\.push(.*)\n', + 'pod_0100\.push(.*)\n'] + + # failed result + if dom.xpath(failure_xpath): + return results + + # get line that matches the pattern + for pattern in possible_locations: + for script in scripts: + try: + line = search(pattern, script.text_content()).group(1) + break + except AttributeError: + continue + if line: + break + + if line: + # extract answer from json + answer = line[line.find('{'):line.rfind('}')+1] + answer = loads(answer.encode('unicode-escape')) + answer = answer['stringified'] + + # clean plaintext answer + h = HTMLParser.HTMLParser() + answer = h.unescape(answer.decode('unicode-escape')) + answer = sub(r'\\', '', answer) + + results.append({'answer': answer}) + + # user input is in first part of title + title = dom.xpath(title_xpath)[0].text + result_url = request(title[:-16], {})['url'] + + # append result + results.append({'url': result_url, + 'title': title}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index e23e4c390..63e944060 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -300,13 +300,15 @@ engines: engine : vimeo shortcut : vm -# You can use the engine using the official stable API, but you need an API key -# See : http://products.wolframalpha.com/api/ -# - name : wolframalpha -# shortcut : wa -# engine : wolframalpha_api -# api_key: 'apikey' # required! -# timeout: 6.0 + - name : wolframalpha + shortcut : wa + # You can use the engine using the official stable API, but you need an API key + # See : http://products.wolframalpha.com/api/ + # engine : wolframalpha_api + # api_key: '' # required! + engine : wolframalpha_noapi + timeout: 6.0 + disabled : True #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images diff --git a/searx/tests/engines/test_wolframalpha_api.py b/searx/tests/engines/test_wolframalpha_api.py new file mode 100644 index 000000000..d9e23182f --- /dev/null +++ b/searx/tests/engines/test_wolframalpha_api.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wolframalpha_api +from searx.testing import SearxTestCase + + +class TestWolframAlphaAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + api_key = 'XXXXXX-XXXXXXXXXX' + dicto = defaultdict(dict) + dicto['api_key'] = api_key + params = wolframalpha_api.request(query, dicto) + + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wolframalpha.com', params['url']) + + self.assertIn('api_key', params) + self.assertIn(api_key, params['api_key']) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_api.response, None) + self.assertRaises(AttributeError, wolframalpha_api.response, []) + self.assertRaises(AttributeError, wolframalpha_api.response, '') + self.assertRaises(AttributeError, wolframalpha_api.response, '[]') + + xml = '''<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='false' error='false' /> + ''' + + response = mock.Mock(content=xml) + self.assertEqual(wolframalpha_api.response(response), []) + + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='false' + error='false' + numpods='0' + datatypes='' + timedout='' + timedoutpods='' + timing='0.241' + parsetiming='0.074' + parsetimedout='false' + recalculate='' + id='' + host='http://www5a.wolframalpha.com' + server='56' + related='' + version='2.6'> + <tips count='1'> + <tip text='Check your spelling, and use English' /> + </tips> + </queryresult> + """ + + response = mock.Mock(content=xml) + self.assertEqual(wolframalpha_api.response(response), []) + + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='true' + error='false' + numpods='6' + datatypes='' + timedout='' + timedoutpods='' + timing='0.684' + parsetiming='0.138' + parsetimedout='false' + recalculate='' + id='MSPa416020a7966dachc463600000f9c66cc21444cfg' + host='http://www3.wolframalpha.com' + server='6' + related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?...' + version='2.6'> + <pod title='Input' + scanner='Identity' + id='Input' + position='100' + error='false' + numsubpods='1'> + <subpod title=''> + <plaintext>sqrt(-1)</plaintext> + </subpod> + </pod> + <pod title='Result' + scanner='Simplification' + id='Result' + position='200' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <plaintext></plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='Result__Step-by-step solution' /> + </states> + </pod> + <pod title='Polar coordinates' + scanner='Numeric' + id='PolarCoordinates' + position='300' + error='false' + numsubpods='1'> + <subpod title=''> + <plaintext>r1 (radius), θ90° (angle)</plaintext> + </subpod> + </pod> + <pod title='Position in the complex plane' + scanner='Numeric' + id='PositionInTheComplexPlane' + position='400' + error='false' + numsubpods='1'> + <subpod title=''> + <plaintext></plaintext> + </subpod> + </pod> + <pod title='All 2nd roots of -1' + scanner='RootsOfUnity' + id='' + position='500' + error='false' + numsubpods='2'> + <subpod title=''> + <plaintext> (principal root)</plaintext> + </subpod> + <subpod title=''> + <plaintext>-</plaintext> + </subpod> + </pod> + <pod title='Plot of all roots in the complex plane' + scanner='RootsOfUnity' + id='PlotOfAllRootsInTheComplexPlane' + position='600' + error='false' + numsubpods='1'> + <subpod title=''> + <plaintext></plaintext> + </subpod> + </pod> + </queryresult> + """ + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + # self.assertEqual(len(results), 2) + self.assertEqual(len(results), 1) + self.assertIn("i", results[0]['answer']) + # self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) + # self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url']) + + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='true' + error='false' + numpods='2' + datatypes='' + timedout='' + timedoutpods='' + timing='1.286' + parsetiming='0.255' + parsetimedout='false' + recalculate='' + id='MSPa195222ad740ede5214h30000480ca61h003d3gd6' + host='http://www3.wolframalpha.com' + server='20' + related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?id=...' + version='2.6'> + <pod title='Indefinite integral' + scanner='Integral' + id='IndefiniteIntegral' + position='100' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <plaintext>∫1/xxlog(x)+constant</plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='IndefiniteIntegral__Step-by-step solution' /> + </states> + <infos count='1'> + <info text='log(x) is the natural logarithm'> + <link url='http://reference.wolfram.com/mathematica/ref/Log.html' + text='Documentation' + title='Mathematica' /> + <link url='http://functions.wolfram.com/ElementaryFunctions/Log' + text='Properties' + title='Wolfram Functions Site' /> + <link url='http://mathworld.wolfram.com/NaturalLogarithm.html' + text='Definition' + title='MathWorld' /> + </info> + </infos> + </pod> + <pod title='Plots of the integral' + scanner='Integral' + id='Plot' + position='200' + error='false' + numsubpods='2'> + <subpod title=''> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__1_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__1_Real-valued plot' /> + </statelist> + </states> + </subpod> + <subpod title=''> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__2_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__2_Real-valued plot' /> + </statelist> + </states> + </subpod> + </pod> + <assumptions count='1'> + <assumption type='Clash' + word='integral' + template='Assuming "${word}" is ${desc1}. Use as ${desc2} instead' + count='2'> + <value name='IntegralsWord' + desc='an integral' + input='*C.integral-_*IntegralsWord-' /> + <value name='MathematicalFunctionIdentityPropertyClass' + desc='a function property' + input='*C.integral-_*MathematicalFunctionIdentityPropertyClass-' /> + </assumption> + </assumptions> + </queryresult> + """ + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + # self.assertEqual(len(results), 2) + self.assertEqual(len(results), 1) + self.assertIn("log(x)+c", results[0]['answer']) + # self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) + # self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url']) diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py new file mode 100644 index 000000000..237f578db --- /dev/null +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wolframalpha_noapi +from searx.testing import SearxTestCase + + +class TestWolframAlphaNoAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = wolframalpha_noapi.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wolframalpha.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_noapi.response, None) + self.assertRaises(AttributeError, wolframalpha_noapi.response, []) + self.assertRaises(AttributeError, wolframalpha_noapi.response, '') + self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]') + + html = """ + <!DOCTYPE html> + <title> Parangaricutirimícuaro - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <div id="closest"> + <p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p> + <div id="dtips"> + <div class="tip"> + <span class="tip-title">Tip: </span> + Check your spelling, and use English + <span class="tip-extra"></span> + </div> + </div> + </div> + </body> + </html> + """ + + response = mock.Mock(text=html) + self.assertEqual(wolframalpha_noapi.response(response), []) + + html = """ + <!DOCTYPE html> + <title> sqrt(-1) - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + try { + document.domain = "wolframalpha.com"; + context = parent ? parent : document; + } catch(e){} + try { + if (typeof(context.$) == "undefined") { + context = window; + } else { + $=context.$; + } + } + catch(e){ context = window;} + + try { + + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + + context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": "", "popLinks": {} }); + + } catch(e) { } + + try { + + $("#results #pod_0100:not(iframe #pod_0100)") + .add("#showsteps #pod_0100:not(iframe #pod_0100)") + .add(".results-pod #pod_0100:not(iframe #pod_0100)") + .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') + .data("podIdentifier", '\x22Input\x22') + .data("podShortIdentifier", '\x22Input\x22') + .data("buttonStates", '\x22\x22') + .data("scanner", '\x22\x22'); + $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") + .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") + .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") + .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') + .data("podIdentifier", '\x22Input\x22') + .data("podShortIdentifier", '\x22Input\x22') + .data("buttonStates", '\x22\x22') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0100_1") + .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") + .add(".results-pod #subpod_0100_1") + .data("tempFileID", "MSP44511e0dda34g97a0c89000059490h319161eea3") + .data("cellDataTempFile", "MSP44521e0dda34g97a0c89000011378c50d38ede6h") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + + } catch(e){} + + //false + + try { + + if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) { + context.jsonArray.popups.pod_0200 = []; + } + + context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": "", "popLinks": {} }); + + } catch(e) { } + + try { + + $("#results #pod_0200:not(iframe #pod_0200)") + .add("#showsteps #pod_0200:not(iframe #pod_0200)") + .add(".results-pod #pod_0200:not(iframe #pod_0200)") + .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') + .data("podIdentifier", '\x22Result\x22') + .data("podShortIdentifier", '\x22Result\x22') + .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None,\x20None,\x20None\x7D') + .data("scanner", '\x22\x22'); + $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") + .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") + .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") + .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') + .data("podIdentifier", '\x22Result\x22') + .data("podShortIdentifier", '\x22Result\x22') + .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None\x7D') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0200_1") + .add("#showsteps #subpod_0200_1:not(iframe #subpod_0200_1)") + .add(".results-pod #subpod_0200_1") + .data("tempFileID", "MSP44551e0dda34g97a0c8900003gdgd37faa7272e0") + .data("cellDataTempFile", "MSP44561e0dda34g97a0c89000018ea1iae00104g13") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + } catch(e){} + </script> + </body> + </html> + """ + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn("i", results[0]['answer']) + self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) + self.assertIn("http://www.wolframalpha.com/input/?i=+sqrt%28-1%29", results[1]['url']) + + html = """ + <!DOCTYPE html> + <title> integral 1/x - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + //true + try { + document.domain = "wolframalpha.com"; + context = parent ? parent : document; + } catch(e){} + try { + if (typeof(context.$) == "undefined") { + context = window; + } else { + $=context.$; + } + } + catch(e){ context = window;} + + try { + + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + + context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"}); + + } catch(e) { } + + try { + + $("#results #pod_0100:not(iframe #pod_0100)") + .add("#showsteps #pod_0100:not(iframe #pod_0100)") + .add(".results-pod #pod_0100:not(iframe #pod_0100)") + .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') + .data("podIdentifier", '\x22IndefiniteIntegral\x22') + .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') + .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') + .data("scanner", '\x22\x22'); + $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") + .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") + .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") + .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') + .data("podIdentifier", '\x22IndefiniteIntegral\x22') + .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') + .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0100_1") + .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") + .add(".results-pod #subpod_0100_1") + .data("tempFileID", "MSP2071if2202e8bg0757100004dg60f2a4ca8cf73") + .data("cellDataTempFile", "MSP2081if2202e8bg0757100001h18329f72fe90fg") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + + } catch(e){} + + //false + try { + + $("#results #pod_0200:not(iframe #pod_0200)") + .add("#showsteps #pod_0200:not(iframe #pod_0200)") + .add(".results-pod #pod_0200:not(iframe #pod_0200)") + .data("tempFileID", '') + .data("podIdentifier", '\x22Plot\x22') + .data("podShortIdentifier", '') + .data("buttonStates", '') + .data("scanner", '\x22\x22'); + $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") + .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") + .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") + .data("tempFileID", '') + .data("podIdentifier", '\x22Plot\x22') + .data("podShortIdentifier", '') + .data("buttonStates", '') + .data("scanner", '\x22\x22'); + + } catch(e){} + </script> + </body> + </html> + """ + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn("log(x)+c", results[0]['answer']) + self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) + self.assertIn("http://www.wolframalpha.com/input/?i=+integral+1%2Fx", results[1]['url']) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 793b77460..f88d53d71 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -36,6 +36,8 @@ from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_swisscows import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa +from searx.tests.engines.test_wolframalpha_api import * # noqa +from searx.tests.engines.test_wolframalpha_noapi import * # noqa from searx.tests.engines.test_www1x import * # noqa from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_yacy import * # noqa |