summaryrefslogtreecommitdiff
path: root/searx/engines/wikipedia.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines/wikipedia.py')
-rw-r--r--searx/engines/wikipedia.py21
1 files changed, 12 insertions, 9 deletions
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index db2fdc000..a216ba886 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -13,6 +13,7 @@
from json import loads
from lxml.html import fromstring
from searx.url_utils import quote, urlencode
+from searx.utils import match_language
# search-url
base_url = u'https://{language}.wikipedia.org/'
@@ -20,7 +21,8 @@ search_url = base_url + u'w/api.php?'\
'action=query'\
'&format=json'\
'&{query}'\
- '&prop=extracts|pageimages'\
+ '&prop=extracts|pageimages|pageprops'\
+ '&ppprop=disambiguation'\
'&exintro'\
'&explaintext'\
'&pithumbsize=300'\
@@ -30,13 +32,10 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url
def url_lang(lang):
- lang = lang.split('-')[0]
- if lang == 'all' or lang not in supported_languages:
- language = 'en'
- else:
- language = lang
-
- return language
+ lang_pre = lang.split('-')[0]
+ if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
+ return 'en'
+ return match_language(lang, supported_languages, language_aliases).split('-')[0]
# do search-request
@@ -81,12 +80,15 @@ def response(resp):
# wikipedia article's unique id
# first valid id is assumed to be the requested article
+ if 'pages' not in search_result['query']:
+ return results
+
for article_id in search_result['query']['pages']:
page = search_result['query']['pages'][article_id]
if int(article_id) > 0:
break
- if int(article_id) < 0:
+ if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}):
return []
title = page.get('title')
@@ -98,6 +100,7 @@ def response(resp):
extract = page.get('extract')
summary = extract_first_paragraph(extract, title, image)
+ summary = summary.replace('() ', '')
# link to wikipedia article
wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \