From 09ee2aa69dbd4815e0e1e1de53f3571972e04903 Mon Sep 17 00:00:00 2001 From: marc Date: Wed, 6 Jul 2016 17:29:40 -0500 Subject: [fix] Result text in Wolfram|Alpha (#607) --- searx/engines/wolframalpha_api.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'searx/engines/wolframalpha_api.py') diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 4526c825f..0e38051d1 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -22,6 +22,7 @@ answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' pods_xpath = '//pod' subpods_xpath = './subpod' +pod_primary_xpath = './@primary' pod_id_xpath = './@id' pod_title_xpath = './@title' plaintext_xpath = './plaintext' @@ -78,10 +79,12 @@ def response(resp): infobox_title = None pods = search_results.xpath(pods_xpath) + result = "" result_chunks = [] for pod in pods: pod_id = pod.xpath(pod_id_xpath)[0] pod_title = pod.xpath(pod_title_xpath)[0] + pod_is_result = pod.xpath(pod_primary_xpath) subpods = pod.xpath(subpods_xpath) if not subpods: @@ -94,6 +97,9 @@ def response(resp): if content and pod_id not in image_pods: + if pod_is_result: + result = content + # if no input pod was found, title is first plaintext pod if not infobox_title: infobox_title = content @@ -116,7 +122,7 @@ def response(resp): # append link to site results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': 'Wolfram|Alpha', - 'content': infobox_title}) + 'title': infobox_title + ' - Wolfram|Alpha', + 'content': result}) return results -- cgit v1.2.3 From a8907224a1c433b1227fd707e9bb2524dd405109 Mon Sep 17 00:00:00 2001 From: "Lorenzo J. Lucchini" Date: Thu, 7 Jul 2016 00:33:03 +0200 Subject: Improving Wolfram Alpha search hit content Making WA search hits contain - the (parsed) input inside the "title" instead of just "Wolfram|Alpha", to better match other hit titles and to confirm correct parsing of input to the user - the first output field that contains any text (skipping ones that are only pictures; this is usually the most meaningful "result" field) instead of the raw input as the "content", making it additionally possible to obtain WA computations from JSON API calls --- searx/engines/wolframalpha_api.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'searx/engines/wolframalpha_api.py') diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 0e38051d1..e743c8f56 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -18,7 +18,6 @@ api_key = '' # defined in settings.yml # xpath variables failure_xpath = '/queryresult[attribute::success="false"]' -answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' pods_xpath = '//pod' subpods_xpath = './subpod' @@ -76,11 +75,11 @@ def response(resp): try: infobox_title = search_results.xpath(input_xpath)[0].text except: - infobox_title = None + infobox_title = "" pods = search_results.xpath(pods_xpath) - result = "" result_chunks = [] + result_content = "" for pod in pods: pod_id = pod.xpath(pod_id_xpath)[0] pod_title = pod.xpath(pod_title_xpath)[0] @@ -97,8 +96,9 @@ def response(resp): if content and pod_id not in image_pods: - if pod_is_result: - result = content + if pod_is_result or not result_content: + if pod_id != "Input": + result_content = "%s: %s" % (pod_title, content) # if no input pod was found, title is first plaintext pod if not infobox_title: @@ -115,6 +115,8 @@ def response(resp): if not result_chunks: return [] + title = "Wolfram|Alpha (%s)" % infobox_title + # append infobox results.append({'infobox': infobox_title, 'attributes': result_chunks, @@ -122,7 +124,7 @@ def response(resp): # append link to site results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': infobox_title + ' - Wolfram|Alpha', - 'content': result}) + 'title': title, + 'content': result_content}) return results -- cgit v1.2.3