summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/apkmirror.py2
-rw-r--r--searx/engines/loc.py68
-rw-r--r--searx/engines/wikipedia.py9
3 files changed, 76 insertions, 3 deletions
diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py
index a4c66e891..a9ddd711a 100644
--- a/searx/engines/apkmirror.py
+++ b/searx/engines/apkmirror.py
@@ -45,7 +45,7 @@ def response(resp):
dom = html.fromstring(resp.text)
# parse results
- for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]/div[@class="appRow"]'):
+ for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'):
link = eval_xpath_getindex(result, './/h5/a', 0)
url = base_url + link.attrib.get('href') + '#downloads'
diff --git a/searx/engines/loc.py b/searx/engines/loc.py
new file mode 100644
index 000000000..5c09ceff2
--- /dev/null
+++ b/searx/engines/loc.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+
+ Library of Congress : images from Prints and Photographs Online Catalog
+
+"""
+
+from json import loads
+from urllib.parse import urlencode
+
+
+about = {
+ "website": 'https://www.loc.gov/pictures/',
+ "wikidata_id": 'Q131454',
+ "official_api_documentation": 'https://www.loc.gov/pictures/api',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
+categories = ['images']
+
+paging = True
+
+base_url = 'https://loc.gov/pictures/search/?'
+search_string = "&sp={page}&{query}&fo=json"
+
+IMG_SRC_FIXES = {
+ 'https://tile.loc.gov/storage-services/': 'https://tile.loc.gov/storage-services/',
+ 'https://loc.gov/pictures/static/images/': 'https://tile.loc.gov/storage-services/',
+ 'https://www.loc.gov/pictures/cdn/': 'https://tile.loc.gov/storage-services/',
+}
+
+
+def request(query, params):
+
+ search_path = search_string.format(
+ query=urlencode({'q': query}),
+ page=params['pageno'])
+
+ params['url'] = base_url + search_path
+
+ return params
+
+
+def response(resp):
+ results = []
+
+ json_data = loads(resp.text)
+
+ for result in json_data['results']:
+ img_src = result['image']['full']
+ for url_prefix, url_replace in IMG_SRC_FIXES.items():
+ if img_src.startswith(url_prefix):
+ img_src = img_src.replace(url_prefix, url_replace)
+ break
+ else:
+ img_src = result['image']['thumb']
+ results.append({
+ 'url': result['links']['item'],
+ 'title': result['title'],
+ 'img_src': img_src,
+ 'thumbnail_src': result['image']['thumb'],
+ 'author': result['creator'],
+ 'template': 'images.html'
+ })
+
+ return results
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index eff301145..c8e589e64 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -22,6 +22,7 @@ about = {
# search-url
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
+language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
# set language in base_url
@@ -37,8 +38,12 @@ def request(query, params):
if query.islower():
query = query.title()
+ language = url_lang(params['language'])
params['url'] = search_url.format(title=quote(query),
- language=url_lang(params['language']))
+ language=language)
+
+ if params['language'].lower() in language_variants.get(language, []):
+ params['headers']['Accept-Language'] = params['language'].lower()
params['headers']['User-Agent'] = searx_useragent()
params['raise_for_httperror'] = False
@@ -60,7 +65,7 @@ def response(resp):
if api_result.get('type') != 'standard':
return []
- title = api_result['title']
+ title = api_result['displaytitle']
wikipedia_link = api_result['content_urls']['desktop']['page']
results.append({'url': wikipedia_link, 'title': title})