diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/brave.py | 7 | ||||
| -rw-r--r-- | searx/engines/deepl.py | 7 | ||||
| -rw-r--r-- | searx/engines/dictzone.py | 101 | ||||
| -rw-r--r-- | searx/engines/duckduckgo.py | 8 | ||||
| -rw-r--r-- | searx/engines/duckduckgo_definitions.py | 8 | ||||
| -rw-r--r-- | searx/engines/google.py | 8 | ||||
| -rw-r--r-- | searx/engines/libretranslate.py | 25 | ||||
| -rw-r--r-- | searx/engines/lingva.py | 43 | ||||
| -rw-r--r-- | searx/engines/mozhi.py | 29 | ||||
| -rw-r--r-- | searx/engines/openstreetmap.py | 35 | ||||
| -rw-r--r-- | searx/engines/tineye.py | 9 | ||||
| -rw-r--r-- | searx/engines/translated.py | 45 | ||||
| -rw-r--r-- | searx/engines/xpath.py | 2 |
13 files changed, 190 insertions, 137 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py index db1fc7976..584d2d95c 100644 --- a/searx/engines/brave.py +++ b/searx/engines/brave.py @@ -139,6 +139,7 @@ from searx.utils import ( get_embeded_stream_url, ) from searx.enginelib.traits import EngineTraits +from searx.result_types import Answer if TYPE_CHECKING: import logging @@ -274,10 +275,14 @@ def _parse_search(resp): result_list = [] dom = html.fromstring(resp.text) + # I doubt that Brave is still providing the "answer" class / I haven't seen + # answers in brave for a long time. answer_tag = eval_xpath_getindex(dom, '//div[@class="answer"]', 0, default=None) if answer_tag: url = eval_xpath_getindex(dom, '//div[@id="featured_snippet"]/a[@class="result-header"]/@href', 0, default=None) - result_list.append({'answer': extract_text(answer_tag), 'url': url}) + answer = extract_text(answer_tag) + if answer is not None: + Answer(results=result_list, answer=answer, url=url) # xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]' xpath_results = '//div[contains(@class, "snippet ")]' diff --git a/searx/engines/deepl.py b/searx/engines/deepl.py index 484f56ec4..eff746b6f 100644 --- a/searx/engines/deepl.py +++ b/searx/engines/deepl.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Deepl translation engine""" +from searx.result_types import Translations + about = { "website": 'https://deepl.com', "wikidata_id": 'Q43968444', @@ -45,8 +47,7 @@ def response(resp): if not result.get('translations'): return results - translations = [{'text': translation['text']} for translation in result['translations']] - - results.append({'answer': translations[0]['text'], 'answer_type': 'translations', 'translations': translations}) + translations = [Translations.Item(text=t['text']) for t in result['translations']] + Translations(results=results, translations=translations) return results diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index acd682911..7f562c716 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -3,8 +3,12 @@ Dictzone """ +import urllib.parse from lxml import html -from searx.utils import eval_xpath + +from searx.utils import eval_xpath, extract_text +from searx.result_types import Translations +from searx.network import get as http_get # https://github.com/searxng/searxng/issues/762 # about about = { @@ -18,46 +22,83 @@ about = { engine_type = 'online_dictionary' categories = ['general', 'translate'] -url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' +base_url = "https://dictzone.com" weight = 100 - -results_xpath = './/table[@id="r"]/tr' https_support = True def request(query, params): # pylint: disable=unused-argument - params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query']) + from_lang = params["from_lang"][2] # "english" + to_lang = params["to_lang"][2] # "german" + query = params["query"] + + params["url"] = f"{base_url}/{from_lang}-{to_lang}-dictionary/{urllib.parse.quote_plus(query)}" return params +def _clean_up_node(node): + for x in ["./i", "./span", "./button"]: + for n in node.xpath(x): + n.getparent().remove(n) + + def response(resp): + + results = [] + item_list = [] + + if not resp.ok: + return results + dom = html.fromstring(resp.text) - translations = [] - for result in eval_xpath(dom, results_xpath)[1:]: - try: - from_result, to_results_raw = eval_xpath(result, './td') - except: # pylint: disable=bare-except + for result in eval_xpath(dom, ".//table[@id='r']//tr"): + + # each row is an Translations.Item + + td_list = result.xpath("./td") + if len(td_list) != 2: + # ignore header columns "tr/th" continue - to_results = [] - for to_result in eval_xpath(to_results_raw, './p/a'): - t = to_result.text_content() - if t.strip(): - to_results.append(to_result.text_content()) - - translations.append( - { - 'text': f"{from_result.text_content()} - {'; '.join(to_results)}", - } - ) - - if translations: - result = { - 'answer': translations[0]['text'], - 'translations': translations, - 'answer_type': 'translations', - } - - return [result] + col_from, col_to = td_list + _clean_up_node(col_from) + + text = f"{extract_text(col_from)}" + + synonyms = [] + p_list = col_to.xpath(".//p") + + for i, p_item in enumerate(p_list): + + smpl: str = extract_text(p_list[i].xpath("./i[@class='smpl']")) # type: ignore + _clean_up_node(p_item) + p_text: str = extract_text(p_item) # type: ignore + + if smpl: + p_text += " // " + smpl + + if i == 0: + text += f" : {p_text}" + continue + + synonyms.append(p_text) + + item = Translations.Item(text=text, synonyms=synonyms) + item_list.append(item) + + # the "autotranslate" of dictzone is loaded by the JS from URL: + # https://dictzone.com/trans/hello%20world/en_de + + from_lang = resp.search_params["from_lang"][1] # "en" + to_lang = resp.search_params["to_lang"][1] # "de" + query = resp.search_params["query"] + + # works only sometimes? + autotranslate = http_get(f"{base_url}/trans/{query}/{from_lang}_{to_lang}", timeout=1.0) + if autotranslate.ok and autotranslate.text: + item_list.insert(0, Translations.Item(text=autotranslate.text)) + + Translations(results=results, translations=item_list, url=resp.search_params["url"]) + return results diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index d6c5be8f4..ff6727959 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -27,6 +27,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7 from searx import redisdb from searx.enginelib.traits import EngineTraits from searx.exceptions import SearxEngineCaptchaException +from searx.result_types import Answer if TYPE_CHECKING: import logging @@ -398,12 +399,7 @@ def response(resp): ): current_query = resp.search_params["data"].get("q") - results.append( - { - 'answer': zero_click, - 'url': "https://duckduckgo.com/?" + urlencode({"q": current_query}), - } - ) + Answer(results=results, answer=zero_click, url="https://duckduckgo.com/?" + urlencode({"q": current_query})) return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 59caed8ce..e1947f4c0 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -21,6 +21,7 @@ from lxml import html from searx.data import WIKIDATA_UNITS from searx.utils import extract_text, html_to_text, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom +from searx.result_types import Answer if TYPE_CHECKING: import logging @@ -99,9 +100,10 @@ def response(resp): # add answer if there is one answer = search_res.get('Answer', '') if answer: - logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer) - if search_res.get('AnswerType') not in ['calc', 'ip']: - results.append({'answer': html_to_text(answer), 'url': search_res.get('AbstractURL', '')}) + answer_type = search_res.get('AnswerType') + logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer) + if isinstance(answer, str) and answer_type not in ['calc', 'ip']: + Answer(results=results, answer=html_to_text(answer), url=search_res.get('AbstractURL', '')) # add infobox if 'Definition' in search_res: diff --git a/searx/engines/google.py b/searx/engines/google.py index e322aa41b..d390e6e98 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -25,6 +25,7 @@ from searx.locales import language_tag, region_tag, get_official_locales from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.exceptions import SearxEngineCaptchaException from searx.enginelib.traits import EngineTraits +from searx.result_types import Answer if TYPE_CHECKING: import logging @@ -331,12 +332,7 @@ def response(resp): for item in answer_list: for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'): bubble.drop_tree() - results.append( - { - 'answer': extract_text(item), - 'url': (eval_xpath(item, '../..//a/@href') + [None])[0], - } - ) + Answer(results=results, answer=extract_text(item), url=(eval_xpath(item, '../..//a/@href') + [None])[0]) # parse results diff --git a/searx/engines/libretranslate.py b/searx/engines/libretranslate.py index d9b9cf2f9..2e6663cb0 100644 --- a/searx/engines/libretranslate.py +++ b/searx/engines/libretranslate.py @@ -2,7 +2,8 @@ """LibreTranslate (Free and Open Source Machine Translation API)""" import random -from json import dumps +import json +from searx.result_types import Translations about = { "website": 'https://libretranslate.com', @@ -16,19 +17,27 @@ about = { engine_type = 'online_dictionary' categories = ['general', 'translate'] -base_url = "https://translate.terraprint.co" -api_key = '' +base_url = "https://libretranslate.com/translate" +api_key = "" def request(_query, params): request_url = random.choice(base_url) if isinstance(base_url, list) else base_url + + if request_url.startswith("https://libretranslate.com") and not api_key: + return None params['url'] = f"{request_url}/translate" - args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query'], 'alternatives': 3} + args = { + 'q': params['query'], + 'source': params['from_lang'][1], + 'target': params['to_lang'][1], + 'alternatives': 3, + } if api_key: args['api_key'] = api_key - params['data'] = dumps(args) + params['data'] = json.dumps(args) params['method'] = 'POST' params['headers'] = {'Content-Type': 'application/json'} params['req_url'] = request_url @@ -41,12 +50,10 @@ def response(resp): json_resp = resp.json() text = json_resp.get('translatedText') - if not text: return results - translations = [{'text': text}] + [{'text': alternative} for alternative in json_resp.get('alternatives', [])] - - results.append({'answer': text, 'answer_type': 'translations', 'translations': translations}) + item = Translations.Item(text=text, examples=json_resp.get('alternatives', [])) + Translations(results=results, translations=[item]) return results diff --git a/searx/engines/lingva.py b/searx/engines/lingva.py index ecebe4587..1cf70f636 100644 --- a/searx/engines/lingva.py +++ b/searx/engines/lingva.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Lingva (alternative Google Translate frontend)""" +from searx.result_types import Translations + about = { "website": 'https://lingva.ml', "wikidata_id": None, @@ -14,13 +16,10 @@ engine_type = 'online_dictionary' categories = ['general', 'translate'] url = "https://lingva.thedaviddelta.com" -search_url = "{url}/api/v1/{from_lang}/{to_lang}/{query}" def request(_query, params): - params['url'] = search_url.format( - url=url, from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'] - ) + params['url'] = f"{url}/api/v1/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}" return params @@ -45,32 +44,30 @@ def response(resp): for definition in info['definitions']: for translation in definition['list']: data.append( - { - 'text': result['translation'], - 'definitions': [translation['definition']] if translation['definition'] else [], - 'examples': [translation['example']] if translation['example'] else [], - 'synonyms': translation['synonyms'], - } + Translations.Item( + text=result['translation'], + definitions=[translation['definition']] if translation['definition'] else [], + examples=[translation['example']] if translation['example'] else [], + synonyms=translation['synonyms'], + ) ) for translation in info["extraTranslations"]: for word in translation["list"]: data.append( - { - 'text': word['word'], - 'definitions': word['meanings'], - } + Translations.Item( + text=word['word'], + definitions=word['meanings'], + ) ) if not data and result['translation']: - data.append({'text': result['translation']}) - - results.append( - { - 'answer': data[0]['text'], - 'answer_type': 'translations', - 'translations': data, - } - ) + data.append(Translations.Item(text=result['translation'])) + params = resp.search_params + Translations( + results=results, + translations=data, + url=f"{url}/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}", + ) return results diff --git a/searx/engines/mozhi.py b/searx/engines/mozhi.py index a36bfbec8..c337a287c 100644 --- a/searx/engines/mozhi.py +++ b/searx/engines/mozhi.py @@ -3,7 +3,9 @@ import random import re -from urllib.parse import urlencode +import urllib.parse + +from searx.result_types import Translations about = { "website": 'https://codeberg.org/aryak/mozhi', @@ -27,34 +29,33 @@ def request(_query, params): request_url = random.choice(base_url) if isinstance(base_url, list) else base_url args = {'from': params['from_lang'][1], 'to': params['to_lang'][1], 'text': params['query'], 'engine': mozhi_engine} - params['url'] = f"{request_url}/api/translate?{urlencode(args)}" + params['url'] = f"{request_url}/api/translate?{urllib.parse.urlencode(args)}" return params def response(resp): + results = [] translation = resp.json() - data = {'text': translation['translated-text'], 'definitions': [], 'examples': []} + item = Translations.Item(text=translation['translated-text']) if translation['target_transliteration'] and not re.match( re_transliteration_unsupported, translation['target_transliteration'] ): - data['transliteration'] = translation['target_transliteration'] + item.transliteration = translation['target_transliteration'] if translation['word_choices']: for word in translation['word_choices']: if word.get('definition'): - data['definitions'].append(word['definition']) + item.definitions.append(word['definition']) for example in word.get('examples_target', []): - data['examples'].append(re.sub(r"<|>", "", example).lstrip('- ')) - - data['synonyms'] = translation.get('source_synonyms', []) + item.examples.append(re.sub(r"<|>", "", example).lstrip('- ')) - result = { - 'answer': translation['translated-text'], - 'answer_type': 'translations', - 'translations': [data], - } + item.synonyms = translation.get('source_synonyms', []) - return [result] + url = urllib.parse.urlparse(resp.search_params["url"]) + # remove the api path + url = url._replace(path="", fragment="").geturl() + Translations(results=results, translations=[item], url=url) + return results diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 8f3565eda..3b1885522 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -4,16 +4,16 @@ """ import re -from json import loads -from urllib.parse import urlencode +import urllib.parse + from functools import partial from flask_babel import gettext from searx.data import OSM_KEYS_TAGS, CURRENCIES -from searx.utils import searx_useragent from searx.external_urls import get_external_url from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail +from searx.result_types import Answer # about about = { @@ -37,8 +37,7 @@ search_string = 'search?{query}&polygon_geojson=1&format=jsonv2&addressdetails=1 result_id_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' result_lat_lon_url = 'https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom={zoom}&layers=M' -route_url = 'https://graphhopper.com/maps/?point={}&point={}&locale=en-US&vehicle=car&weighting=fastest&turn_costs=true&use_miles=false&layer=Omniscale' # pylint: disable=line-too-long -route_re = re.compile('(?:from )?(.+) to (.+)') +route_url = 'https://graphhopper.com/maps' wikidata_image_sparql = """ select ?item ?itemLabel ?image ?sign ?symbol ?website ?wikipediaName @@ -138,27 +137,25 @@ KEY_RANKS = {k: i for i, k in enumerate(KEY_ORDER)} def request(query, params): - """do search-request""" - params['url'] = base_url + search_string.format(query=urlencode({'q': query})) - params['route'] = route_re.match(query) - params['headers']['User-Agent'] = searx_useragent() - if 'Accept-Language' not in params['headers']: - params['headers']['Accept-Language'] = 'en' + params['url'] = base_url + search_string.format(query=urllib.parse.urlencode({'q': query})) return params def response(resp): - """get response from search-request""" results = [] - nominatim_json = loads(resp.text) + + nominatim_json = resp.json() user_language = resp.search_params['language'] - if resp.search_params['route']: - results.append( - { - 'answer': gettext('Get directions'), - 'url': route_url.format(*resp.search_params['route'].groups()), - } + l = re.findall(r"from\s+(.*)\s+to\s+(.+)", resp.search_params["query"]) + if not l: + l = re.findall(r"\s*(.*)\s+to\s+(.+)", resp.search_params["query"]) + if l: + point1, point2 = [urllib.parse.quote_plus(p) for p in l[0]] + Answer( + results=results, + answer=gettext('Show route in map ..'), + url=f"{route_url}/?point={point1}&point={point2}", ) # simplify the code below: make sure extratags is a dictionary diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py index 20f6e41fd..b2f6c3e31 100644 --- a/searx/engines/tineye.py +++ b/searx/engines/tineye.py @@ -156,6 +156,7 @@ def parse_tineye_match(match_json): def response(resp): """Parse HTTP response from TinEye.""" + results = [] # handle the 422 client side errors, and the possible 400 status code error if resp.status_code in (400, 422): @@ -182,14 +183,14 @@ def response(resp): message = ','.join(description) # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023 - # results.append({'answer': message}) - logger.error(message) - return [] + # from searx.result_types import Answer + # Answer(results=results, answer=message) + logger.info(message) + return results # Raise for all other responses resp.raise_for_status() - results = [] json_data = resp.json() for match_json in json_data['matches']: diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 190707a95..632e3d2e1 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -3,6 +3,10 @@ """ +import urllib.parse + +from searx.result_types import Translations + # about about = { "website": 'https://mymemory.translated.net/', @@ -15,8 +19,8 @@ about = { engine_type = 'online_dictionary' categories = ['general', 'translate'] -url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' -web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' +api_url = "https://api.mymemory.translated.net" +web_url = "https://mymemory.translated.net" weight = 100 https_support = True @@ -24,27 +28,32 @@ api_key = '' def request(query, params): # pylint: disable=unused-argument + + args = {"q": params["query"], "langpair": f"{params['from_lang'][1]}|{params['to_lang'][1]}"} if api_key: - key_form = '&key=' + api_key - else: - key_form = '' - params['url'] = url.format( - from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'], key=key_form - ) + args["key"] = api_key + + params['url'] = f"{api_url}/get?{urllib.parse.urlencode(args)}" return params def response(resp): - json_resp = resp.json() - text = json_resp['responseData']['translatedText'] + results = [] + data = resp.json() + + args = { + "q": resp.search_params["query"], + "lang": resp.search_params.get("searxng_locale", "en"), # ui language + "sl": resp.search_params['from_lang'][1], + "tl": resp.search_params['to_lang'][1], + } - alternatives = [match['translation'] for match in json_resp['matches'] if match['translation'] != text] - translations = [{'text': translation} for translation in [text] + alternatives] + link = f"{web_url}/search.php?{urllib.parse.urlencode(args)}" + text = data['responseData']['translatedText'] - result = { - 'answer': translations[0]['text'], - 'answer_type': 'translations', - 'translations': translations, - } + examples = [f"{m['segment']} : {m['translation']}" for m in data['matches'] if m['translation'] != text] + + item = Translations.Item(text=text, examples=examples) + Translations(results=results, translations=[item], url=link) - return [result] + return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 90b551a33..5df74a08f 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -262,7 +262,7 @@ def request(query, params): def response(resp): # pylint: disable=too-many-branches - '''Scrap *results* from the response (see :ref:`engine results`).''' + '''Scrap *results* from the response (see :ref:`result types`).''' if no_result_for_http_status and resp.status_code in no_result_for_http_status: return [] |