searx/engines/wolframalpha_noapi.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

# WolframAlpha (Maths)
#
# @website     http://www.wolframalpha.com/
# @provide-api yes (http://api.wolframalpha.com/v2/)
#
# @using-api   no
# @results     HTML
# @stable      no
# @parse       answer

from re import search, sub
from json import loads
from urllib import urlencode
from lxml import html
import HTMLParser

# search-url
url = 'http://www.wolframalpha.com/'
search_url = url+'input/?{query}'

# xpath variables
scripts_xpath = '//script'
title_xpath = '//title'
failure_xpath = '//p[attribute::class="pfail"]'


# do search-request
def request(query, params):
    params['url'] = search_url.format(query=urlencode({'i': query}))

    return params


# get response from search-request
def response(resp):
    results = []
    line = None

    dom = html.fromstring(resp.text)
    scripts = dom.xpath(scripts_xpath)

    # the answer is inside a js function
    # answer can be located in different 'pods', although by default it should be in pod_0200
    possible_locations = ['pod_0200\.push(.*)\n',
                          'pod_0100\.push(.*)\n']

    # failed result
    if dom.xpath(failure_xpath):
        return results

    # get line that matches the pattern
    for pattern in possible_locations:
        for script in scripts:
            try:
                line = search(pattern, script.text_content()).group(1)
                break
            except AttributeError:
                continue
        if line:
            break

    if line:
        # extract answer from json
        answer = line[line.find('{'):line.rfind('}')+1]
        answer = loads(answer.encode('unicode-escape'))
        answer = answer['stringified']

        # clean plaintext answer
        h = HTMLParser.HTMLParser()
        answer = h.unescape(answer.decode('unicode-escape'))
        answer = sub(r'\\', '', answer)

        results.append({'answer': answer})

    # user input is in first part of title
    title = dom.xpath(title_xpath)[0].text
    result_url = request(title[:-16], {})['url']

    # append result
    results.append({'url': result_url,
                    'title': title})

    return results