From 1d30141c207e51c142cab3eee97783f08c1cb5c9 Mon Sep 17 00:00:00 2001 From: David A Roberts Date: Sat, 14 Jan 2017 18:40:37 +1000 Subject: [enh] show spelling corrections --- searx/engines/google.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/google.py b/searx/engines/google.py index 2fa638d73..0fdf2d4ae 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -112,6 +112,7 @@ title_xpath = './/h3' content_xpath = './/span[@class="st"]' content_misc_xpath = './/div[@class="f slp"]' suggestion_xpath = '//p[@class="_Bmc"]' +spelling_suggestion_xpath = '//a[@class="spell"]' # map : detail location map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()' @@ -275,6 +276,9 @@ def response(resp): # append suggestion results.append({'suggestion': extract_text(suggestion)}) + for correction in dom.xpath(spelling_suggestion_xpath): + results.append({'correction': extract_text(correction)}) + # return results return results -- cgit v1.2.3 From 7492997c517a447b2163abbd800cfd4b84dcf77d Mon Sep 17 00:00:00 2001 From: David A Roberts Date: Tue, 17 Jan 2017 21:14:33 +1000 Subject: [fix] allow empty content --- searx/engines/xpath.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'searx/engines') diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 18943bba4..0d39b28a8 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -31,8 +31,6 @@ if xpath_results is a string element, then it's already done def extract_text(xpath_results): if type(xpath_results) == list: # it's list of result : concat everything using recursive call - if not xpath_results: - raise Exception('Empty url resultset') result = '' for e in xpath_results: result = result + extract_text(e) @@ -48,6 +46,8 @@ def extract_text(xpath_results): def extract_url(xpath_results, search_url): + if xpath_results == []: + raise Exception('Empty url resultset') url = extract_text(xpath_results) if url.startswith('//'): @@ -103,8 +103,8 @@ def response(resp): if results_xpath: for result in dom.xpath(results_xpath): url = extract_url(result.xpath(url_xpath), search_url) - title = extract_text(result.xpath(title_xpath)[0]) - content = extract_text(result.xpath(content_xpath)[0]) + title = extract_text(result.xpath(title_xpath)) + content = extract_text(result.xpath(content_xpath)) results.append({'url': url, 'title': title, 'content': content}) else: for url, title, content in zip( -- cgit v1.2.3 From 52d108720254368c1ed9f83e7e1b48fec0f7f456 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Fri, 27 Jan 2017 00:18:46 +0100 Subject: [enh] add result number parsing to google engine --- searx/engines/google.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'searx/engines') diff --git a/searx/engines/google.py b/searx/engines/google.py index 0fdf2d4ae..e14e9e702 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -221,6 +221,12 @@ def response(resp): instant_answer = dom.xpath('//div[@id="_vBb"]//text()') if instant_answer: results.append({'answer': u' '.join(instant_answer)}) + try: + results_num = int(dom.xpath('//div[@id="resultStats"]//text()')[0] + .split()[1].replace(',', '')) + results.append({'number_of_results': results_num}) + except: + pass # parse results for result in dom.xpath(results_xpath): -- cgit v1.2.3