diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2025-01-27 16:43:43 +0100 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-01-28 07:07:08 +0100 |
| commit | 36a1ef12399d529f210ceb4f8b28f497fabd0834 (patch) | |
| tree | 4015a316c6505c3e52d43eea7137338331cbd4eb /searx/engines | |
| parent | edfbf1e1183815cea3b723f3b66260bc55679f32 (diff) | |
[refactor] typification of SearXNG / EngineResults
In [1] and [2] we discussed the need of a Result.results property and how we can
avoid unclear code. This patch implements a class for the reslut-lists of
engines::
searx.result_types.EngineResults
A simple example for the usage in engine development::
from searx.result_types import EngineResults
...
def response(resp) -> EngineResults:
res = EngineResults()
...
res.add( res.types.Answer(answer="lorem ipsum ..", url="https://example.org") )
...
return res
[1] https://github.com/searxng/searxng/pull/4183#pullrequestreview-257400034
[2] https://github.com/searxng/searxng/pull/4183#issuecomment-2614301580
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/brave.py | 22 | ||||
| -rw-r--r-- | searx/engines/deepl.py | 19 | ||||
| -rw-r--r-- | searx/engines/demo_offline.py | 9 | ||||
| -rw-r--r-- | searx/engines/demo_online.py | 16 | ||||
| -rw-r--r-- | searx/engines/dictzone.py | 12 | ||||
| -rw-r--r-- | searx/engines/duckduckgo.py | 19 | ||||
| -rw-r--r-- | searx/engines/duckduckgo_definitions.py | 13 | ||||
| -rw-r--r-- | searx/engines/google.py | 13 | ||||
| -rw-r--r-- | searx/engines/libretranslate.py | 10 | ||||
| -rw-r--r-- | searx/engines/lingva.py | 21 | ||||
| -rw-r--r-- | searx/engines/mozhi.py | 12 | ||||
| -rw-r--r-- | searx/engines/openstreetmap.py | 16 | ||||
| -rw-r--r-- | searx/engines/tineye.py | 9 | ||||
| -rw-r--r-- | searx/engines/translated.py | 10 | ||||
| -rw-r--r-- | searx/engines/xpath.py | 11 |
15 files changed, 121 insertions, 91 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py index 584d2d95c..828f6154e 100644 --- a/searx/engines/brave.py +++ b/searx/engines/brave.py @@ -139,7 +139,7 @@ from searx.utils import ( get_embeded_stream_url, ) from searx.enginelib.traits import EngineTraits -from searx.result_types import Answer +from searx.result_types import EngineResults if TYPE_CHECKING: import logging @@ -249,7 +249,7 @@ def _extract_published_date(published_date_raw): return None -def response(resp): +def response(resp) -> EngineResults: if brave_category in ('search', 'goggles'): return _parse_search(resp) @@ -270,9 +270,9 @@ def response(resp): raise ValueError(f"Unsupported brave category: {brave_category}") -def _parse_search(resp): +def _parse_search(resp) -> EngineResults: + result_list = EngineResults() - result_list = [] dom = html.fromstring(resp.text) # I doubt that Brave is still providing the "answer" class / I haven't seen @@ -282,7 +282,7 @@ def _parse_search(resp): url = eval_xpath_getindex(dom, '//div[@id="featured_snippet"]/a[@class="result-header"]/@href', 0, default=None) answer = extract_text(answer_tag) if answer is not None: - Answer(results=result_list, answer=answer, url=url) + result_list.add(result_list.types.Answer(answer=answer, url=url)) # xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]' xpath_results = '//div[contains(@class, "snippet ")]' @@ -339,8 +339,8 @@ def _parse_search(resp): return result_list -def _parse_news(json_resp): - result_list = [] +def _parse_news(json_resp) -> EngineResults: + result_list = EngineResults() for result in json_resp["results"]: item = { @@ -356,8 +356,8 @@ def _parse_news(json_resp): return result_list -def _parse_images(json_resp): - result_list = [] +def _parse_images(json_resp) -> EngineResults: + result_list = EngineResults() for result in json_resp["results"]: item = { @@ -375,8 +375,8 @@ def _parse_images(json_resp): return result_list -def _parse_videos(json_resp): - result_list = [] +def _parse_videos(json_resp) -> EngineResults: + result_list = EngineResults() for result in json_resp["results"]: diff --git a/searx/engines/deepl.py b/searx/engines/deepl.py index eff746b6f..aec17076f 100644 --- a/searx/engines/deepl.py +++ b/searx/engines/deepl.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Deepl translation engine""" -from searx.result_types import Translations +from searx.result_types import EngineResults about = { "website": 'https://deepl.com', @@ -39,15 +39,14 @@ def request(_query, params): return params -def response(resp): - results = [] +def response(resp) -> EngineResults: - result = resp.json() + res = EngineResults() + data = resp.json() + if not data.get('translations'): + return res - if not result.get('translations'): - return results + translations = [res.types.Translations.Item(text=t['text']) for t in data['translations']] + res.add(res.types.Translations(translations=translations)) - translations = [Translations.Item(text=t['text']) for t in result['translations']] - Translations(results=results, translations=translations) - - return results + return res diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py index 97659465a..ffcdb46a9 100644 --- a/searx/engines/demo_offline.py +++ b/searx/engines/demo_offline.py @@ -13,6 +13,7 @@ close to the implementation, its just a simple example. To get in use of this """ import json +from searx.result_types import EngineResults engine_type = 'offline' categories = ['general'] @@ -48,14 +49,14 @@ def init(engine_settings=None): ) -def search(query, request_params): +def search(query, request_params) -> EngineResults: """Query (offline) engine and return results. Assemble the list of results from your local engine. In this demo engine we ignore the 'query' term, usual you would pass the 'query' term to your local engine to filter out the results. """ - ret_val = [] + res = EngineResults() result_list = json.loads(_my_offline_engine) @@ -67,6 +68,6 @@ def search(query, request_params): # choose a result template or comment out to use the *default* 'template': 'key-value.html', } - ret_val.append(entry) + res.append(entry) - return ret_val + return res diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py index 6accc0033..ee06c3b31 100644 --- a/searx/engines/demo_online.py +++ b/searx/engines/demo_online.py @@ -17,6 +17,7 @@ list in ``settings.yml``: from json import loads from urllib.parse import urlencode +from searx.result_types import EngineResults engine_type = 'online' send_accept_language_header = True @@ -70,21 +71,28 @@ def request(query, params): return params -def response(resp): +def response(resp) -> EngineResults: """Parse out the result items from the response. In this example we parse the response from `api.artic.edu <https://artic.edu>`__ and filter out all images. """ - results = [] + res = EngineResults() json_data = loads(resp.text) + res.add( + res.types.Answer( + answer="this is a dummy answer ..", + url="https://example.org", + ) + ) + for result in json_data['data']: if not result['image_id']: continue - results.append( + res.append( { 'url': 'https://artic.edu/artworks/%(id)s' % result, 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, @@ -95,4 +103,4 @@ def response(resp): } ) - return results + return res diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 7f562c716..83ab28ff2 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -7,7 +7,7 @@ import urllib.parse from lxml import html from searx.utils import eval_xpath, extract_text -from searx.result_types import Translations +from searx.result_types import EngineResults from searx.network import get as http_get # https://github.com/searxng/searxng/issues/762 # about @@ -43,9 +43,9 @@ def _clean_up_node(node): n.getparent().remove(n) -def response(resp): +def response(resp) -> EngineResults: + results = EngineResults() - results = [] item_list = [] if not resp.ok: @@ -85,7 +85,7 @@ def response(resp): synonyms.append(p_text) - item = Translations.Item(text=text, synonyms=synonyms) + item = results.types.Translations.Item(text=text, synonyms=synonyms) item_list.append(item) # the "autotranslate" of dictzone is loaded by the JS from URL: @@ -98,7 +98,7 @@ def response(resp): # works only sometimes? autotranslate = http_get(f"{base_url}/trans/{query}/{from_lang}_{to_lang}", timeout=1.0) if autotranslate.ok and autotranslate.text: - item_list.insert(0, Translations.Item(text=autotranslate.text)) + item_list.insert(0, results.types.Translations.Item(text=autotranslate.text)) - Translations(results=results, translations=item_list, url=resp.search_params["url"]) + results.add(results.types.Translations(translations=item_list, url=resp.search_params["url"])) return results diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index ff6727959..a03a94063 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -27,7 +27,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7 from searx import redisdb from searx.enginelib.traits import EngineTraits from searx.exceptions import SearxEngineCaptchaException -from searx.result_types import Answer +from searx.result_types import EngineResults if TYPE_CHECKING: import logging @@ -355,12 +355,12 @@ def is_ddg_captcha(dom): return bool(eval_xpath(dom, "//form[@id='challenge-form']")) -def response(resp): +def response(resp) -> EngineResults: + results = EngineResults() if resp.status_code == 303: - return [] + return results - results = [] doc = lxml.html.fromstring(resp.text) if is_ddg_captcha(doc): @@ -398,8 +398,15 @@ def response(resp): and "URL Decoded:" not in zero_click ): current_query = resp.search_params["data"].get("q") - - Answer(results=results, answer=zero_click, url="https://duckduckgo.com/?" + urlencode({"q": current_query})) + results.add( + results.types.Answer( + answer=zero_click, + url="https://duckduckgo.com/?" + + urlencode( + {"q": current_query}, + ), + ) + ) return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index e1947f4c0..75021242f 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -21,7 +21,7 @@ from lxml import html from searx.data import WIKIDATA_UNITS from searx.utils import extract_text, html_to_text, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom -from searx.result_types import Answer +from searx.result_types import EngineResults if TYPE_CHECKING: import logging @@ -76,9 +76,9 @@ def request(query, params): return params -def response(resp): +def response(resp) -> EngineResults: # pylint: disable=too-many-locals, too-many-branches, too-many-statements - results = [] + results = EngineResults() search_res = resp.json() @@ -103,7 +103,12 @@ def response(resp): answer_type = search_res.get('AnswerType') logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer) if isinstance(answer, str) and answer_type not in ['calc', 'ip']: - Answer(results=results, answer=html_to_text(answer), url=search_res.get('AbstractURL', '')) + results.add( + results.types.Answer( + answer=html_to_text(answer), + url=search_res.get('AbstractURL', ''), + ) + ) # add infobox if 'Definition' in search_res: diff --git a/searx/engines/google.py b/searx/engines/google.py index d390e6e98..9fd037a4f 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -25,7 +25,7 @@ from searx.locales import language_tag, region_tag, get_official_locales from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.exceptions import SearxEngineCaptchaException from searx.enginelib.traits import EngineTraits -from searx.result_types import Answer +from searx.result_types import EngineResults if TYPE_CHECKING: import logging @@ -316,12 +316,12 @@ def _parse_data_images(dom): return data_image_map -def response(resp): +def response(resp) -> EngineResults: """Get response from google's search request""" # pylint: disable=too-many-branches, too-many-statements detect_google_sorry(resp) - results = [] + results = EngineResults() # convert the text to dom dom = html.fromstring(resp.text) @@ -332,7 +332,12 @@ def response(resp): for item in answer_list: for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'): bubble.drop_tree() - Answer(results=results, answer=extract_text(item), url=(eval_xpath(item, '../..//a/@href') + [None])[0]) + results.add( + results.types.Answer( + answer=extract_text(item), + url=(eval_xpath(item, '../..//a/@href') + [None])[0], + ) + ) # parse results diff --git a/searx/engines/libretranslate.py b/searx/engines/libretranslate.py index 2e6663cb0..b9b8c7165 100644 --- a/searx/engines/libretranslate.py +++ b/searx/engines/libretranslate.py @@ -3,7 +3,7 @@ import random import json -from searx.result_types import Translations +from searx.result_types import EngineResults about = { "website": 'https://libretranslate.com', @@ -45,15 +45,15 @@ def request(_query, params): return params -def response(resp): - results = [] +def response(resp) -> EngineResults: + results = EngineResults() json_resp = resp.json() text = json_resp.get('translatedText') if not text: return results - item = Translations.Item(text=text, examples=json_resp.get('alternatives', [])) - Translations(results=results, translations=[item]) + item = results.types.Translations.Item(text=text, examples=json_resp.get('alternatives', [])) + results.add(results.types.Translations(translations=[item])) return results diff --git a/searx/engines/lingva.py b/searx/engines/lingva.py index 1cf70f636..156e37d89 100644 --- a/searx/engines/lingva.py +++ b/searx/engines/lingva.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Lingva (alternative Google Translate frontend)""" -from searx.result_types import Translations +from searx.result_types import EngineResults about = { "website": 'https://lingva.ml', @@ -23,8 +23,8 @@ def request(_query, params): return params -def response(resp): - results = [] +def response(resp) -> EngineResults: + results = EngineResults() result = resp.json() info = result["info"] @@ -44,7 +44,7 @@ def response(resp): for definition in info['definitions']: for translation in definition['list']: data.append( - Translations.Item( + results.types.Translations.Item( text=result['translation'], definitions=[translation['definition']] if translation['definition'] else [], examples=[translation['example']] if translation['example'] else [], @@ -55,19 +55,20 @@ def response(resp): for translation in info["extraTranslations"]: for word in translation["list"]: data.append( - Translations.Item( + results.types.Translations.Item( text=word['word'], definitions=word['meanings'], ) ) if not data and result['translation']: - data.append(Translations.Item(text=result['translation'])) + data.append(results.types.Translations.Item(text=result['translation'])) params = resp.search_params - Translations( - results=results, - translations=data, - url=f"{url}/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}", + results.add( + results.types.Translations( + translations=data, + url=f"{url}/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}", + ) ) return results diff --git a/searx/engines/mozhi.py b/searx/engines/mozhi.py index c337a287c..af9bb344c 100644 --- a/searx/engines/mozhi.py +++ b/searx/engines/mozhi.py @@ -5,7 +5,7 @@ import random import re import urllib.parse -from searx.result_types import Translations +from searx.result_types import EngineResults about = { "website": 'https://codeberg.org/aryak/mozhi', @@ -33,11 +33,11 @@ def request(_query, params): return params -def response(resp): - results = [] +def response(resp) -> EngineResults: + res = EngineResults() translation = resp.json() - item = Translations.Item(text=translation['translated-text']) + item = res.types.Translations.Item(text=translation['translated-text']) if translation['target_transliteration'] and not re.match( re_transliteration_unsupported, translation['target_transliteration'] @@ -57,5 +57,5 @@ def response(resp): url = urllib.parse.urlparse(resp.search_params["url"]) # remove the api path url = url._replace(path="", fragment="").geturl() - Translations(results=results, translations=[item], url=url) - return results + res.add(res.types.Translations(translations=[item], url=url)) + return res diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 3b1885522..64ed6809b 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -13,7 +13,7 @@ from flask_babel import gettext from searx.data import OSM_KEYS_TAGS, CURRENCIES from searx.external_urls import get_external_url from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail -from searx.result_types import Answer +from searx.result_types import EngineResults # about about = { @@ -141,8 +141,8 @@ def request(query, params): return params -def response(resp): - results = [] +def response(resp) -> EngineResults: + results = EngineResults() nominatim_json = resp.json() user_language = resp.search_params['language'] @@ -152,10 +152,12 @@ def response(resp): l = re.findall(r"\s*(.*)\s+to\s+(.+)", resp.search_params["query"]) if l: point1, point2 = [urllib.parse.quote_plus(p) for p in l[0]] - Answer( - results=results, - answer=gettext('Show route in map ..'), - url=f"{route_url}/?point={point1}&point={point2}", + + results.add( + results.types.Answer( + answer=gettext('Show route in map ..'), + url=f"{route_url}/?point={point1}&point={point2}", + ) ) # simplify the code below: make sure extratags is a dictionary diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py index b2f6c3e31..e3a7ab470 100644 --- a/searx/engines/tineye.py +++ b/searx/engines/tineye.py @@ -19,6 +19,8 @@ from urllib.parse import urlencode from datetime import datetime from flask_babel import gettext +from searx.result_types import EngineResults + if TYPE_CHECKING: import logging @@ -154,9 +156,9 @@ def parse_tineye_match(match_json): } -def response(resp): +def response(resp) -> EngineResults: """Parse HTTP response from TinEye.""" - results = [] + results = EngineResults() # handle the 422 client side errors, and the possible 400 status code error if resp.status_code in (400, 422): @@ -183,8 +185,7 @@ def response(resp): message = ','.join(description) # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023 - # from searx.result_types import Answer - # Answer(results=results, answer=message) + # results.add(results.types.Answer(answer=message)) logger.info(message) return results diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 632e3d2e1..cffb6eda3 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -5,7 +5,7 @@ import urllib.parse -from searx.result_types import Translations +from searx.result_types import EngineResults # about about = { @@ -37,8 +37,8 @@ def request(query, params): # pylint: disable=unused-argument return params -def response(resp): - results = [] +def response(resp) -> EngineResults: + results = EngineResults() data = resp.json() args = { @@ -53,7 +53,7 @@ def response(resp): examples = [f"{m['segment']} : {m['translation']}" for m in data['matches'] if m['translation'] != text] - item = Translations.Item(text=text, examples=examples) - Translations(results=results, translations=[item], url=link) + item = results.types.Translations.Item(text=text, examples=examples) + results.add(results.types.Translations(translations=[item], url=link)) return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 5df74a08f..db892d392 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -74,6 +74,7 @@ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list from searx.network import raise_for_httperror +from searx.result_types import EngineResults search_url = None """ @@ -261,15 +262,15 @@ def request(query, params): return params -def response(resp): # pylint: disable=too-many-branches - '''Scrap *results* from the response (see :ref:`result types`).''' +def response(resp) -> EngineResults: # pylint: disable=too-many-branches + """Scrap *results* from the response (see :ref:`result types`).""" + results = EngineResults() + if no_result_for_http_status and resp.status_code in no_result_for_http_status: - return [] + return results raise_for_httperror(resp) - results = [] - if not resp.text: return results |