summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2018-04-09 09:34:26 +0200
committerGitHub <noreply@github.com>2018-04-09 09:34:26 +0200
commite5def5b0191a36315d300457cdfdf68aebbc9a4c (patch)
tree3a969551bf2f3c899b12dd5a5ea2522f7b605942 /searx
parent283f6c905340087d7511bfcdb815c0b4183bcdda (diff)
parent96877862269f35aefc0b3ca7a7cb8812b1555dc4 (diff)
Merge pull request #1260 from MarcAbonce/engine-fixes
[fix] Engine fixes
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/google_news.py4
-rw-r--r--searx/engines/wikidata.py13
-rw-r--r--searx/engines/xpath.py2
-rw-r--r--searx/settings.yml1
4 files changed, 10 insertions, 10 deletions
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 41abf0a01..aadcb76df 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -68,8 +68,8 @@ def response(resp):
for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
try:
r = {
- 'url': result.xpath('.//a[@class="l _PMs"]')[0].attrib.get("href"),
- 'title': ''.join(result.xpath('.//a[@class="l _PMs"]//text()')),
+ 'url': result.xpath('.//a[@class="l lLrAF"]')[0].attrib.get("href"),
+ 'title': ''.join(result.xpath('.//a[@class="l lLrAF"]//text()')),
'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
}
except:
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 1fdbc9869..fe53609c1 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -27,7 +27,7 @@ result_count = 1
# urls
wikidata_host = 'https://www.wikidata.org'
url_search = wikidata_host \
- + '/wiki/Special:ItemDisambiguation?{query}'
+ + '/w/index.php?{query}'
wikidata_api = wikidata_host + '/w/api.php'
url_detail = wikidata_api\
@@ -40,7 +40,7 @@ url_map = 'https://www.openstreetmap.org/'\
url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
# xpaths
-wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
+wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
title_xpath = '//*[contains(@class,"wikibase-title-label")]'
description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
property_xpath = '//div[@id="{propertyid}"]'
@@ -57,22 +57,21 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
def request(query, params):
- language = match_language(params['language'], supported_languages).split('-')[0]
-
params['url'] = url_search.format(
- query=urlencode({'label': query, 'language': language}))
+ query=urlencode({'search': query}))
return params
def response(resp):
results = []
html = fromstring(resp.text)
- wikidata_ids = html.xpath(wikidata_ids_xpath)
+ search_results = html.xpath(wikidata_ids_xpath)
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
# TODO: make requests asynchronous to avoid timeout when result_count > 1
- for wikidata_id in wikidata_ids[:result_count]:
+ for search_result in search_results[:result_count]:
+ wikidata_id = search_result.split('/')[-1]
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
htmlresponse = get(url)
jsonresponse = loads(htmlresponse.text)
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index c8c56da44..50f98d935 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -53,7 +53,7 @@ def extract_url(xpath_results, search_url):
if url.startswith('//'):
# add http or https to this kind of url //example.com/
parsed_search_url = urlparse(search_url)
- url = u'{0}:{1}'.format(parsed_search_url.scheme, url)
+ url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
elif url.startswith('/'):
# fix relative url to the search engine
url = urljoin(search_url, url)
diff --git a/searx/settings.yml b/searx/settings.yml
index 70750fc96..d72d01a54 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -174,6 +174,7 @@ engines:
- name : wikidata
engine : wikidata
shortcut : wd
+ timeout : 3.0
weight : 2
- name : duckduckgo