summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/brave.py7
-rw-r--r--searx/engines/deepl.py7
-rw-r--r--searx/engines/dictzone.py101
-rw-r--r--searx/engines/duckduckgo.py8
-rw-r--r--searx/engines/duckduckgo_definitions.py8
-rw-r--r--searx/engines/google.py8
-rw-r--r--searx/engines/libretranslate.py25
-rw-r--r--searx/engines/lingva.py43
-rw-r--r--searx/engines/mozhi.py29
-rw-r--r--searx/engines/openstreetmap.py35
-rw-r--r--searx/engines/tineye.py9
-rw-r--r--searx/engines/translated.py45
-rw-r--r--searx/engines/xpath.py2
13 files changed, 190 insertions, 137 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py
index db1fc7976..584d2d95c 100644
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@@ -139,6 +139,7 @@ from searx.utils import (
get_embeded_stream_url,
)
from searx.enginelib.traits import EngineTraits
+from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@@ -274,10 +275,14 @@ def _parse_search(resp):
result_list = []
dom = html.fromstring(resp.text)
+ # I doubt that Brave is still providing the "answer" class / I haven't seen
+ # answers in brave for a long time.
answer_tag = eval_xpath_getindex(dom, '//div[@class="answer"]', 0, default=None)
if answer_tag:
url = eval_xpath_getindex(dom, '//div[@id="featured_snippet"]/a[@class="result-header"]/@href', 0, default=None)
- result_list.append({'answer': extract_text(answer_tag), 'url': url})
+ answer = extract_text(answer_tag)
+ if answer is not None:
+ Answer(results=result_list, answer=answer, url=url)
# xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'
xpath_results = '//div[contains(@class, "snippet ")]'
diff --git a/searx/engines/deepl.py b/searx/engines/deepl.py
index 484f56ec4..eff746b6f 100644
--- a/searx/engines/deepl.py
+++ b/searx/engines/deepl.py
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Deepl translation engine"""
+from searx.result_types import Translations
+
about = {
"website": 'https://deepl.com',
"wikidata_id": 'Q43968444',
@@ -45,8 +47,7 @@ def response(resp):
if not result.get('translations'):
return results
- translations = [{'text': translation['text']} for translation in result['translations']]
-
- results.append({'answer': translations[0]['text'], 'answer_type': 'translations', 'translations': translations})
+ translations = [Translations.Item(text=t['text']) for t in result['translations']]
+ Translations(results=results, translations=translations)
return results
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
index acd682911..7f562c716 100644
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -3,8 +3,12 @@
Dictzone
"""
+import urllib.parse
from lxml import html
-from searx.utils import eval_xpath
+
+from searx.utils import eval_xpath, extract_text
+from searx.result_types import Translations
+from searx.network import get as http_get # https://github.com/searxng/searxng/issues/762
# about
about = {
@@ -18,46 +22,83 @@ about = {
engine_type = 'online_dictionary'
categories = ['general', 'translate']
-url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+base_url = "https://dictzone.com"
weight = 100
-
-results_xpath = './/table[@id="r"]/tr'
https_support = True
def request(query, params): # pylint: disable=unused-argument
- params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query'])
+ from_lang = params["from_lang"][2] # "english"
+ to_lang = params["to_lang"][2] # "german"
+ query = params["query"]
+
+ params["url"] = f"{base_url}/{from_lang}-{to_lang}-dictionary/{urllib.parse.quote_plus(query)}"
return params
+def _clean_up_node(node):
+ for x in ["./i", "./span", "./button"]:
+ for n in node.xpath(x):
+ n.getparent().remove(n)
+
+
def response(resp):
+
+ results = []
+ item_list = []
+
+ if not resp.ok:
+ return results
+
dom = html.fromstring(resp.text)
- translations = []
- for result in eval_xpath(dom, results_xpath)[1:]:
- try:
- from_result, to_results_raw = eval_xpath(result, './td')
- except: # pylint: disable=bare-except
+ for result in eval_xpath(dom, ".//table[@id='r']//tr"):
+
+ # each row is an Translations.Item
+
+ td_list = result.xpath("./td")
+ if len(td_list) != 2:
+ # ignore header columns "tr/th"
continue
- to_results = []
- for to_result in eval_xpath(to_results_raw, './p/a'):
- t = to_result.text_content()
- if t.strip():
- to_results.append(to_result.text_content())
-
- translations.append(
- {
- 'text': f"{from_result.text_content()} - {'; '.join(to_results)}",
- }
- )
-
- if translations:
- result = {
- 'answer': translations[0]['text'],
- 'translations': translations,
- 'answer_type': 'translations',
- }
-
- return [result]
+ col_from, col_to = td_list
+ _clean_up_node(col_from)
+
+ text = f"{extract_text(col_from)}"
+
+ synonyms = []
+ p_list = col_to.xpath(".//p")
+
+ for i, p_item in enumerate(p_list):
+
+ smpl: str = extract_text(p_list[i].xpath("./i[@class='smpl']")) # type: ignore
+ _clean_up_node(p_item)
+ p_text: str = extract_text(p_item) # type: ignore
+
+ if smpl:
+ p_text += " // " + smpl
+
+ if i == 0:
+ text += f" : {p_text}"
+ continue
+
+ synonyms.append(p_text)
+
+ item = Translations.Item(text=text, synonyms=synonyms)
+ item_list.append(item)
+
+ # the "autotranslate" of dictzone is loaded by the JS from URL:
+ # https://dictzone.com/trans/hello%20world/en_de
+
+ from_lang = resp.search_params["from_lang"][1] # "en"
+ to_lang = resp.search_params["to_lang"][1] # "de"
+ query = resp.search_params["query"]
+
+ # works only sometimes?
+ autotranslate = http_get(f"{base_url}/trans/{query}/{from_lang}_{to_lang}", timeout=1.0)
+ if autotranslate.ok and autotranslate.text:
+ item_list.insert(0, Translations.Item(text=autotranslate.text))
+
+ Translations(results=results, translations=item_list, url=resp.search_params["url"])
+ return results
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index d6c5be8f4..ff6727959 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -27,6 +27,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7
from searx import redisdb
from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineCaptchaException
+from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@@ -398,12 +399,7 @@ def response(resp):
):
current_query = resp.search_params["data"].get("q")
- results.append(
- {
- 'answer': zero_click,
- 'url': "https://duckduckgo.com/?" + urlencode({"q": current_query}),
- }
- )
+ Answer(results=results, answer=zero_click, url="https://duckduckgo.com/?" + urlencode({"q": current_query}))
return results
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 59caed8ce..e1947f4c0 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -21,6 +21,7 @@ from lxml import html
from searx.data import WIKIDATA_UNITS
from searx.utils import extract_text, html_to_text, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
+from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@@ -99,9 +100,10 @@ def response(resp):
# add answer if there is one
answer = search_res.get('Answer', '')
if answer:
- logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer)
- if search_res.get('AnswerType') not in ['calc', 'ip']:
- results.append({'answer': html_to_text(answer), 'url': search_res.get('AbstractURL', '')})
+ answer_type = search_res.get('AnswerType')
+ logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer)
+ if isinstance(answer, str) and answer_type not in ['calc', 'ip']:
+ Answer(results=results, answer=html_to_text(answer), url=search_res.get('AbstractURL', ''))
# add infobox
if 'Definition' in search_res:
diff --git a/searx/engines/google.py b/searx/engines/google.py
index e322aa41b..d390e6e98 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -25,6 +25,7 @@ from searx.locales import language_tag, region_tag, get_official_locales
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.exceptions import SearxEngineCaptchaException
from searx.enginelib.traits import EngineTraits
+from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@@ -331,12 +332,7 @@ def response(resp):
for item in answer_list:
for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'):
bubble.drop_tree()
- results.append(
- {
- 'answer': extract_text(item),
- 'url': (eval_xpath(item, '../..//a/@href') + [None])[0],
- }
- )
+ Answer(results=results, answer=extract_text(item), url=(eval_xpath(item, '../..//a/@href') + [None])[0])
# parse results
diff --git a/searx/engines/libretranslate.py b/searx/engines/libretranslate.py
index d9b9cf2f9..2e6663cb0 100644
--- a/searx/engines/libretranslate.py
+++ b/searx/engines/libretranslate.py
@@ -2,7 +2,8 @@
"""LibreTranslate (Free and Open Source Machine Translation API)"""
import random
-from json import dumps
+import json
+from searx.result_types import Translations
about = {
"website": 'https://libretranslate.com',
@@ -16,19 +17,27 @@ about = {
engine_type = 'online_dictionary'
categories = ['general', 'translate']
-base_url = "https://translate.terraprint.co"
-api_key = ''
+base_url = "https://libretranslate.com/translate"
+api_key = ""
def request(_query, params):
request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
+
+ if request_url.startswith("https://libretranslate.com") and not api_key:
+ return None
params['url'] = f"{request_url}/translate"
- args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query'], 'alternatives': 3}
+ args = {
+ 'q': params['query'],
+ 'source': params['from_lang'][1],
+ 'target': params['to_lang'][1],
+ 'alternatives': 3,
+ }
if api_key:
args['api_key'] = api_key
- params['data'] = dumps(args)
+ params['data'] = json.dumps(args)
params['method'] = 'POST'
params['headers'] = {'Content-Type': 'application/json'}
params['req_url'] = request_url
@@ -41,12 +50,10 @@ def response(resp):
json_resp = resp.json()
text = json_resp.get('translatedText')
-
if not text:
return results
- translations = [{'text': text}] + [{'text': alternative} for alternative in json_resp.get('alternatives', [])]
-
- results.append({'answer': text, 'answer_type': 'translations', 'translations': translations})
+ item = Translations.Item(text=text, examples=json_resp.get('alternatives', []))
+ Translations(results=results, translations=[item])
return results
diff --git a/searx/engines/lingva.py b/searx/engines/lingva.py
index ecebe4587..1cf70f636 100644
--- a/searx/engines/lingva.py
+++ b/searx/engines/lingva.py
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Lingva (alternative Google Translate frontend)"""
+from searx.result_types import Translations
+
about = {
"website": 'https://lingva.ml',
"wikidata_id": None,
@@ -14,13 +16,10 @@ engine_type = 'online_dictionary'
categories = ['general', 'translate']
url = "https://lingva.thedaviddelta.com"
-search_url = "{url}/api/v1/{from_lang}/{to_lang}/{query}"
def request(_query, params):
- params['url'] = search_url.format(
- url=url, from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query']
- )
+ params['url'] = f"{url}/api/v1/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}"
return params
@@ -45,32 +44,30 @@ def response(resp):
for definition in info['definitions']:
for translation in definition['list']:
data.append(
- {
- 'text': result['translation'],
- 'definitions': [translation['definition']] if translation['definition'] else [],
- 'examples': [translation['example']] if translation['example'] else [],
- 'synonyms': translation['synonyms'],
- }
+ Translations.Item(
+ text=result['translation'],
+ definitions=[translation['definition']] if translation['definition'] else [],
+ examples=[translation['example']] if translation['example'] else [],
+ synonyms=translation['synonyms'],
+ )
)
for translation in info["extraTranslations"]:
for word in translation["list"]:
data.append(
- {
- 'text': word['word'],
- 'definitions': word['meanings'],
- }
+ Translations.Item(
+ text=word['word'],
+ definitions=word['meanings'],
+ )
)
if not data and result['translation']:
- data.append({'text': result['translation']})
-
- results.append(
- {
- 'answer': data[0]['text'],
- 'answer_type': 'translations',
- 'translations': data,
- }
- )
+ data.append(Translations.Item(text=result['translation']))
+ params = resp.search_params
+ Translations(
+ results=results,
+ translations=data,
+ url=f"{url}/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}",
+ )
return results
diff --git a/searx/engines/mozhi.py b/searx/engines/mozhi.py
index a36bfbec8..c337a287c 100644
--- a/searx/engines/mozhi.py
+++ b/searx/engines/mozhi.py
@@ -3,7 +3,9 @@
import random
import re
-from urllib.parse import urlencode
+import urllib.parse
+
+from searx.result_types import Translations
about = {
"website": 'https://codeberg.org/aryak/mozhi',
@@ -27,34 +29,33 @@ def request(_query, params):
request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
args = {'from': params['from_lang'][1], 'to': params['to_lang'][1], 'text': params['query'], 'engine': mozhi_engine}
- params['url'] = f"{request_url}/api/translate?{urlencode(args)}"
+ params['url'] = f"{request_url}/api/translate?{urllib.parse.urlencode(args)}"
return params
def response(resp):
+ results = []
translation = resp.json()
- data = {'text': translation['translated-text'], 'definitions': [], 'examples': []}
+ item = Translations.Item(text=translation['translated-text'])
if translation['target_transliteration'] and not re.match(
re_transliteration_unsupported, translation['target_transliteration']
):
- data['transliteration'] = translation['target_transliteration']
+ item.transliteration = translation['target_transliteration']
if translation['word_choices']:
for word in translation['word_choices']:
if word.get('definition'):
- data['definitions'].append(word['definition'])
+ item.definitions.append(word['definition'])
for example in word.get('examples_target', []):
- data['examples'].append(re.sub(r"<|>", "", example).lstrip('- '))
-
- data['synonyms'] = translation.get('source_synonyms', [])
+ item.examples.append(re.sub(r"<|>", "", example).lstrip('- '))
- result = {
- 'answer': translation['translated-text'],
- 'answer_type': 'translations',
- 'translations': [data],
- }
+ item.synonyms = translation.get('source_synonyms', [])
- return [result]
+ url = urllib.parse.urlparse(resp.search_params["url"])
+ # remove the api path
+ url = url._replace(path="", fragment="").geturl()
+ Translations(results=results, translations=[item], url=url)
+ return results
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 8f3565eda..3b1885522 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -4,16 +4,16 @@
"""
import re
-from json import loads
-from urllib.parse import urlencode
+import urllib.parse
+
from functools import partial
from flask_babel import gettext
from searx.data import OSM_KEYS_TAGS, CURRENCIES
-from searx.utils import searx_useragent
from searx.external_urls import get_external_url
from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail
+from searx.result_types import Answer
# about
about = {
@@ -37,8 +37,7 @@ search_string = 'search?{query}&polygon_geojson=1&format=jsonv2&addressdetails=1
result_id_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
result_lat_lon_url = 'https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom={zoom}&layers=M'
-route_url = 'https://graphhopper.com/maps/?point={}&point={}&locale=en-US&vehicle=car&weighting=fastest&turn_costs=true&use_miles=false&layer=Omniscale' # pylint: disable=line-too-long
-route_re = re.compile('(?:from )?(.+) to (.+)')
+route_url = 'https://graphhopper.com/maps'
wikidata_image_sparql = """
select ?item ?itemLabel ?image ?sign ?symbol ?website ?wikipediaName
@@ -138,27 +137,25 @@ KEY_RANKS = {k: i for i, k in enumerate(KEY_ORDER)}
def request(query, params):
- """do search-request"""
- params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
- params['route'] = route_re.match(query)
- params['headers']['User-Agent'] = searx_useragent()
- if 'Accept-Language' not in params['headers']:
- params['headers']['Accept-Language'] = 'en'
+ params['url'] = base_url + search_string.format(query=urllib.parse.urlencode({'q': query}))
return params
def response(resp):
- """get response from search-request"""
results = []
- nominatim_json = loads(resp.text)
+
+ nominatim_json = resp.json()
user_language = resp.search_params['language']
- if resp.search_params['route']:
- results.append(
- {
- 'answer': gettext('Get directions'),
- 'url': route_url.format(*resp.search_params['route'].groups()),
- }
+ l = re.findall(r"from\s+(.*)\s+to\s+(.+)", resp.search_params["query"])
+ if not l:
+ l = re.findall(r"\s*(.*)\s+to\s+(.+)", resp.search_params["query"])
+ if l:
+ point1, point2 = [urllib.parse.quote_plus(p) for p in l[0]]
+ Answer(
+ results=results,
+ answer=gettext('Show route in map ..'),
+ url=f"{route_url}/?point={point1}&point={point2}",
)
# simplify the code below: make sure extratags is a dictionary
diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py
index 20f6e41fd..b2f6c3e31 100644
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@@ -156,6 +156,7 @@ def parse_tineye_match(match_json):
def response(resp):
"""Parse HTTP response from TinEye."""
+ results = []
# handle the 422 client side errors, and the possible 400 status code error
if resp.status_code in (400, 422):
@@ -182,14 +183,14 @@ def response(resp):
message = ','.join(description)
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
- # results.append({'answer': message})
- logger.error(message)
- return []
+ # from searx.result_types import Answer
+ # Answer(results=results, answer=message)
+ logger.info(message)
+ return results
# Raise for all other responses
resp.raise_for_status()
- results = []
json_data = resp.json()
for match_json in json_data['matches']:
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
index 190707a95..632e3d2e1 100644
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@@ -3,6 +3,10 @@
"""
+import urllib.parse
+
+from searx.result_types import Translations
+
# about
about = {
"website": 'https://mymemory.translated.net/',
@@ -15,8 +19,8 @@ about = {
engine_type = 'online_dictionary'
categories = ['general', 'translate']
-url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
-web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
+api_url = "https://api.mymemory.translated.net"
+web_url = "https://mymemory.translated.net"
weight = 100
https_support = True
@@ -24,27 +28,32 @@ api_key = ''
def request(query, params): # pylint: disable=unused-argument
+
+ args = {"q": params["query"], "langpair": f"{params['from_lang'][1]}|{params['to_lang'][1]}"}
if api_key:
- key_form = '&key=' + api_key
- else:
- key_form = ''
- params['url'] = url.format(
- from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'], key=key_form
- )
+ args["key"] = api_key
+
+ params['url'] = f"{api_url}/get?{urllib.parse.urlencode(args)}"
return params
def response(resp):
- json_resp = resp.json()
- text = json_resp['responseData']['translatedText']
+ results = []
+ data = resp.json()
+
+ args = {
+ "q": resp.search_params["query"],
+ "lang": resp.search_params.get("searxng_locale", "en"), # ui language
+ "sl": resp.search_params['from_lang'][1],
+ "tl": resp.search_params['to_lang'][1],
+ }
- alternatives = [match['translation'] for match in json_resp['matches'] if match['translation'] != text]
- translations = [{'text': translation} for translation in [text] + alternatives]
+ link = f"{web_url}/search.php?{urllib.parse.urlencode(args)}"
+ text = data['responseData']['translatedText']
- result = {
- 'answer': translations[0]['text'],
- 'answer_type': 'translations',
- 'translations': translations,
- }
+ examples = [f"{m['segment']} : {m['translation']}" for m in data['matches'] if m['translation'] != text]
+
+ item = Translations.Item(text=text, examples=examples)
+ Translations(results=results, translations=[item], url=link)
- return [result]
+ return results
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 90b551a33..5df74a08f 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -262,7 +262,7 @@ def request(query, params):
def response(resp): # pylint: disable=too-many-branches
- '''Scrap *results* from the response (see :ref:`engine results`).'''
+ '''Scrap *results* from the response (see :ref:`result types`).'''
if no_result_for_http_status and resp.status_code in no_result_for_http_status:
return []