From b51ba32f619e6b7a927444475b0ee986d4d13a60 Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Tue, 29 Dec 2015 20:59:51 -0600
Subject: Wolfram Alpha (no API needed now)

---
 searx/engines/wolframalpha_noapi.py | 66 +++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 searx/engines/wolframalpha_noapi.py

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
new file mode 100644
index 000000000..1ce2aa1ff
--- /dev/null
+++ b/searx/engines/wolframalpha_noapi.py
@@ -0,0 +1,66 @@
+# WolframAlpha (Maths)
+#
+# @website     http://www.wolframalpha.com/
+#
+# @using-api   no
+# @results     HTML, JS
+# @stable      no
+# @parse       answer
+
+import re
+import json
+from urllib import urlencode
+from lxml import html
+from searx.engines.xpath import extract_text
+
+# search-url
+url = 'http://www.wolframalpha.com/'
+search_url = url+'input/?{query}'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'i': query}))
+
+    return params
+
+
+# tries to find answer under the pattern given
+def extract_answer(script_list, pattern):
+    answer = None
+
+    # get line that matches the pattern
+    for script in script_list:
+        try:
+            line = re.search(pattern, script.text_content()).group(1)
+        except AttributeError:
+            continue
+
+        # extract answer from json
+        answer = line[line.find('{') : line.rfind('}')+1]
+        answer = json.loads(answer.encode('unicode-escape'))
+        answer = answer['stringified'].decode('unicode-escape')
+
+    return answer
+
+
+# get response from search-request
+def response(resp):
+
+    dom = html.fromstring(resp.text)
+
+    # the answer is inside a js script
+    scripts = dom.xpath('//script')
+
+    results = []
+
+    # answer can be located in different 'pods', although by default it should be in pod_0200
+    answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
+    if not answer:
+        answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
+        if answer:
+            results.append({'answer': answer})
+    else:
+        results.append({'answer': answer})
+    
+    return results
-- 
cgit v1.2.3


From d827fc49a11b6f84bba3d006b54a70a6a05757fd Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Tue, 29 Dec 2015 21:11:49 -0600
Subject: Remove unnecessary code in wolframalpha_noapi engine

The answer is scraped from a js function, so parsing the html tree
doesn't achieve anything here.
---
 searx/engines/wolframalpha_noapi.py | 49 ++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 31 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 1ce2aa1ff..29600ca1f 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -10,8 +10,6 @@
 import re
 import json
 from urllib import urlencode
-from lxml import html
-from searx.engines.xpath import extract_text
 
 # search-url
 url = 'http://www.wolframalpha.com/'
@@ -25,42 +23,31 @@ def request(query, params):
     return params
 
 
-# tries to find answer under the pattern given
-def extract_answer(script_list, pattern):
-    answer = None
+# get response from search-request
+def response(resp):
+    results = []
+    
+    # the answer is inside a js function
+    # answer can be located in different 'pods', although by default it should be in pod_0200
+    possible_locations = ['pod_0200\.push(.*)\n',
+                          'pod_0100\.push(.*)\n']
 
     # get line that matches the pattern
-    for script in script_list:
+    for pattern in possible_locations:
         try:
-            line = re.search(pattern, script.text_content()).group(1)
+            line = re.search(pattern, resp.text).group(1)
+            break
         except AttributeError:
             continue
 
-        # extract answer from json
-        answer = line[line.find('{') : line.rfind('}')+1]
-        answer = json.loads(answer.encode('unicode-escape'))
-        answer = answer['stringified'].decode('unicode-escape')
-
-    return answer
-
+    if not line:
+        return results
 
-# get response from search-request
-def response(resp):
-
-    dom = html.fromstring(resp.text)
-
-    # the answer is inside a js script
-    scripts = dom.xpath('//script')
+    # extract answer from json
+    answer = line[line.find('{') : line.rfind('}')+1]
+    answer = json.loads(answer.encode('unicode-escape'))
+    answer = answer['stringified'].decode('unicode-escape')
 
-    results = []
-
-    # answer can be located in different 'pods', although by default it should be in pod_0200
-    answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
-    if not answer:
-        answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
-        if answer:
-            results.append({'answer': answer})
-    else:
-        results.append({'answer': answer})
+    results.append({'answer': answer})
     
     return results
-- 
cgit v1.2.3


From 5ed8f4da80ecd119173d7db871256be8484a9ecb Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Tue, 29 Dec 2015 21:37:48 -0600
Subject: Make wolframalpha_noapi.py flake8 compliant

---
 searx/engines/wolframalpha_noapi.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 29600ca1f..23e912a1e 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -3,7 +3,7 @@
 # @website     http://www.wolframalpha.com/
 #
 # @using-api   no
-# @results     HTML, JS
+# @results     HTML
 # @stable      no
 # @parse       answer
 
@@ -26,7 +26,7 @@ def request(query, params):
 # get response from search-request
 def response(resp):
     results = []
-    
+
     # the answer is inside a js function
     # answer can be located in different 'pods', although by default it should be in pod_0200
     possible_locations = ['pod_0200\.push(.*)\n',
@@ -44,10 +44,10 @@ def response(resp):
         return results
 
     # extract answer from json
-    answer = line[line.find('{') : line.rfind('}')+1]
+    answer = line[line.find('{'):line.rfind('}')+1]
     answer = json.loads(answer.encode('unicode-escape'))
     answer = answer['stringified'].decode('unicode-escape')
 
     results.append({'answer': answer})
-    
+
     return results
-- 
cgit v1.2.3


From be54e5269a982e272e2fe8a5064ed898373c9063 Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Wed, 30 Dec 2015 00:53:15 -0600
Subject: Add tests for the Wolfram Alpha engines (both API and NO API
 versions)

---
 searx/engines/wolframalpha_noapi.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 23e912a1e..9d3afe658 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -7,8 +7,8 @@
 # @stable      no
 # @parse       answer
 
-import re
-import json
+from re import search
+from json import loads
 from urllib import urlencode
 
 # search-url
@@ -26,6 +26,8 @@ def request(query, params):
 # get response from search-request
 def response(resp):
     results = []
+    webpage = resp.text
+    line = None
 
     # the answer is inside a js function
     # answer can be located in different 'pods', although by default it should be in pod_0200
@@ -35,7 +37,7 @@ def response(resp):
     # get line that matches the pattern
     for pattern in possible_locations:
         try:
-            line = re.search(pattern, resp.text).group(1)
+            line = search(pattern, webpage).group(1)
             break
         except AttributeError:
             continue
@@ -45,7 +47,7 @@ def response(resp):
 
     # extract answer from json
     answer = line[line.find('{'):line.rfind('}')+1]
-    answer = json.loads(answer.encode('unicode-escape'))
+    answer = loads(answer.encode('unicode-escape'))
     answer = answer['stringified'].decode('unicode-escape')
 
     results.append({'answer': answer})
-- 
cgit v1.2.3


From 0871c7ca85cd19a2fa0971c7db28516a74255d5d Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Fri, 1 Jan 2016 22:02:10 -0600
Subject: [enh] wolframalpha appends result

---
 searx/engines/wolframalpha_noapi.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 9d3afe658..89a3c45b5 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -1,6 +1,7 @@
 # WolframAlpha (Maths)
 #
 # @website     http://www.wolframalpha.com/
+# @provide-api yes (http://api.wolframalpha.com/v2/)
 #
 # @using-api   no
 # @results     HTML
@@ -14,12 +15,17 @@ from urllib import urlencode
 # search-url
 url = 'http://www.wolframalpha.com/'
 search_url = url+'input/?{query}'
+search_query = ''
 
 
 # do search-request
 def request(query, params):
     params['url'] = search_url.format(query=urlencode({'i': query}))
 
+    # used in response
+    global search_query
+    search_query = query
+
     return params
 
 
@@ -42,14 +48,20 @@ def response(resp):
         except AttributeError:
             continue
 
-    if not line:
-        return results
+    if line:
+        # extract answer from json
+        answer = line[line.find('{'):line.rfind('}')+1]
+        answer = loads(answer.encode('unicode-escape'))
+        answer = answer['stringified'].decode('unicode-escape')
+        
+        results.append({'answer': answer})
 
-    # extract answer from json
-    answer = line[line.find('{'):line.rfind('}')+1]
-    answer = loads(answer.encode('unicode-escape'))
-    answer = answer['stringified'].decode('unicode-escape')
+    # failed result
+    elif search('pfail', webpage):
+        return results
 
-    results.append({'answer': answer})
+    # append result
+    results.append({'url': request(search_query, {})['url'],
+                    'title': search_query + ' - Wolfram|Alpha'})
 
     return results
-- 
cgit v1.2.3


From e9d35c1309f05a0b214fb323049909ee7ec62ab8 Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Sat, 2 Jan 2016 00:41:14 -0600
Subject: update tests for wolframalpha

---
 searx/engines/wolframalpha_noapi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 89a3c45b5..d7442db5d 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -53,7 +53,7 @@ def response(resp):
         answer = line[line.find('{'):line.rfind('}')+1]
         answer = loads(answer.encode('unicode-escape'))
         answer = answer['stringified'].decode('unicode-escape')
-        
+
         results.append({'answer': answer})
 
     # failed result
-- 
cgit v1.2.3


From 19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Sat, 2 Jan 2016 01:49:32 -0600
Subject: [fix] pass wolframalpha_noapi tests

---
 searx/engines/wolframalpha_noapi.py | 43 +++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 16 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index d7442db5d..a730ed60b 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -8,60 +8,71 @@
 # @stable      no
 # @parse       answer
 
-from re import search
+from re import search, sub
 from json import loads
 from urllib import urlencode
+from lxml import html
 
 # search-url
 url = 'http://www.wolframalpha.com/'
 search_url = url+'input/?{query}'
-search_query = ''
+
+# xpath variables
+scripts_xpath = '//script'
+title_xpath = '//title'
+failure_xpath = '//p[attribute::class="pfail"]'
 
 
 # do search-request
 def request(query, params):
     params['url'] = search_url.format(query=urlencode({'i': query}))
 
-    # used in response
-    global search_query
-    search_query = query
-
     return params
 
 
 # get response from search-request
 def response(resp):
     results = []
-    webpage = resp.text
     line = None
 
+    dom = html.fromstring(resp.text)
+    scripts = dom.xpath(scripts_xpath)
+
     # the answer is inside a js function
     # answer can be located in different 'pods', although by default it should be in pod_0200
     possible_locations = ['pod_0200\.push(.*)\n',
                           'pod_0100\.push(.*)\n']
 
+    # failed result
+    if dom.xpath(failure_xpath):
+        return results
+
     # get line that matches the pattern
     for pattern in possible_locations:
-        try:
-            line = search(pattern, webpage).group(1)
+        for script in scripts:
+            try:
+                line = search(pattern, script.text_content()).group(1)
+                break
+            except AttributeError:
+                continue
+        if line:
             break
-        except AttributeError:
-            continue
 
     if line:
         # extract answer from json
         answer = line[line.find('{'):line.rfind('}')+1]
         answer = loads(answer.encode('unicode-escape'))
         answer = answer['stringified'].decode('unicode-escape')
+        answer = sub(r'\\', '', answer)
 
         results.append({'answer': answer})
 
-    # failed result
-    elif search('pfail', webpage):
-        return results
+    # user input is in first part of title
+    title = dom.xpath(title_xpath)[0].text
+    result_url = request(title[:-16], {})['url']
 
     # append result
-    results.append({'url': request(search_query, {})['url'],
-                    'title': search_query + ' - Wolfram|Alpha'})
+    results.append({'url': result_url,
+                    'title': title})
 
     return results
-- 
cgit v1.2.3


From 576d37f256649b570a9c8591a795acd85ac499bc Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Sun, 3 Jan 2016 15:58:01 -0600
Subject: [fix] unescape htmlentities in wolframalpha_noapi's answer

---
 searx/engines/wolframalpha_noapi.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index a730ed60b..0f0315630 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -12,6 +12,7 @@ from re import search, sub
 from json import loads
 from urllib import urlencode
 from lxml import html
+import HTMLParser
 
 # search-url
 url = 'http://www.wolframalpha.com/'
@@ -62,7 +63,11 @@ def response(resp):
         # extract answer from json
         answer = line[line.find('{'):line.rfind('}')+1]
         answer = loads(answer.encode('unicode-escape'))
-        answer = answer['stringified'].decode('unicode-escape')
+        answer = answer['stringified']
+
+        # clean plaintext answer
+        h = HTMLParser.HTMLParser()
+        answer = h.unescape(answer.decode('unicode-escape'))
         answer = sub(r'\\', '', answer)
 
         results.append({'answer': answer})
-- 
cgit v1.2.3


From d997265e5599333b4316561ca18a8f4131e3e2d9 Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Sun, 3 Jan 2016 19:57:37 -0600
Subject: add tests for unicode strings in wolframalpha

---
 searx/engines/wolframalpha_noapi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 0f0315630..71ad3b281 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -73,11 +73,11 @@ def response(resp):
         results.append({'answer': answer})
 
     # user input is in first part of title
-    title = dom.xpath(title_xpath)[0].text
+    title = dom.xpath(title_xpath)[0].text.encode('utf-8')
     result_url = request(title[:-16], {})['url']
 
     # append result
     results.append({'url': result_url,
-                    'title': title})
+                    'title': title.decode('utf-8')})
 
     return results
-- 
cgit v1.2.3


From 2a15944b58089d84a930f36b42c6ef60d4e629b3 Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Sun, 3 Jan 2016 22:03:33 -0600
Subject: [fix] test in wolframalpha_noapi

---
 searx/engines/wolframalpha_noapi.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 71ad3b281..442e894b5 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -41,8 +41,8 @@ def response(resp):
 
     # the answer is inside a js function
     # answer can be located in different 'pods', although by default it should be in pod_0200
-    possible_locations = ['pod_0200\.push(.*)\n',
-                          'pod_0100\.push(.*)\n']
+    possible_locations = ['pod_0200\.push\((.*)',
+                          'pod_0100\.push\((.*)']
 
     # failed result
     if dom.xpath(failure_xpath):
@@ -62,7 +62,10 @@ def response(resp):
     if line:
         # extract answer from json
         answer = line[line.find('{'):line.rfind('}')+1]
-        answer = loads(answer.encode('unicode-escape'))
+        try:
+            answer = loads(answer)
+        except Exception:
+            answer = loads(answer.encode('unicode-escape'))
         answer = answer['stringified']
 
         # clean plaintext answer
-- 
cgit v1.2.3


From 30bfbf2e07def8911d0b293e8032699812f43599 Mon Sep 17 00:00:00 2001
From: a01200356 <a01200356@itesm.mx>
Date: Mon, 18 Jan 2016 11:34:38 -0600
Subject: [fix] pep8

---
 searx/engines/wolframalpha_noapi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'searx/engines/wolframalpha_noapi.py')

diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 442e894b5..291fee04d 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -16,7 +16,7 @@ import HTMLParser
 
 # search-url
 url = 'http://www.wolframalpha.com/'
-search_url = url+'input/?{query}'
+search_url = url + 'input/?{query}'
 
 # xpath variables
 scripts_xpath = '//script'
@@ -61,7 +61,7 @@ def response(resp):
 
     if line:
         # extract answer from json
-        answer = line[line.find('{'):line.rfind('}')+1]
+        answer = line[line.find('{'):line.rfind('}') + 1]
         try:
             answer = loads(answer)
         except Exception:
-- 
cgit v1.2.3