summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-01-27 16:43:43 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-01-28 07:07:08 +0100
commit36a1ef12399d529f210ceb4f8b28f497fabd0834 (patch)
tree4015a316c6505c3e52d43eea7137338331cbd4eb /searx/engines
parentedfbf1e1183815cea3b723f3b66260bc55679f32 (diff)
[refactor] typification of SearXNG / EngineResults
In [1] and [2] we discussed the need of a Result.results property and how we can avoid unclear code. This patch implements a class for the reslut-lists of engines:: searx.result_types.EngineResults A simple example for the usage in engine development:: from searx.result_types import EngineResults ... def response(resp) -> EngineResults: res = EngineResults() ... res.add( res.types.Answer(answer="lorem ipsum ..", url="https://example.org") ) ... return res [1] https://github.com/searxng/searxng/pull/4183#pullrequestreview-257400034 [2] https://github.com/searxng/searxng/pull/4183#issuecomment-2614301580 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/brave.py22
-rw-r--r--searx/engines/deepl.py19
-rw-r--r--searx/engines/demo_offline.py9
-rw-r--r--searx/engines/demo_online.py16
-rw-r--r--searx/engines/dictzone.py12
-rw-r--r--searx/engines/duckduckgo.py19
-rw-r--r--searx/engines/duckduckgo_definitions.py13
-rw-r--r--searx/engines/google.py13
-rw-r--r--searx/engines/libretranslate.py10
-rw-r--r--searx/engines/lingva.py21
-rw-r--r--searx/engines/mozhi.py12
-rw-r--r--searx/engines/openstreetmap.py16
-rw-r--r--searx/engines/tineye.py9
-rw-r--r--searx/engines/translated.py10
-rw-r--r--searx/engines/xpath.py11
15 files changed, 121 insertions, 91 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py
index 584d2d95c..828f6154e 100644
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@@ -139,7 +139,7 @@ from searx.utils import (
get_embeded_stream_url,
)
from searx.enginelib.traits import EngineTraits
-from searx.result_types import Answer
+from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
@@ -249,7 +249,7 @@ def _extract_published_date(published_date_raw):
return None
-def response(resp):
+def response(resp) -> EngineResults:
if brave_category in ('search', 'goggles'):
return _parse_search(resp)
@@ -270,9 +270,9 @@ def response(resp):
raise ValueError(f"Unsupported brave category: {brave_category}")
-def _parse_search(resp):
+def _parse_search(resp) -> EngineResults:
+ result_list = EngineResults()
- result_list = []
dom = html.fromstring(resp.text)
# I doubt that Brave is still providing the "answer" class / I haven't seen
@@ -282,7 +282,7 @@ def _parse_search(resp):
url = eval_xpath_getindex(dom, '//div[@id="featured_snippet"]/a[@class="result-header"]/@href', 0, default=None)
answer = extract_text(answer_tag)
if answer is not None:
- Answer(results=result_list, answer=answer, url=url)
+ result_list.add(result_list.types.Answer(answer=answer, url=url))
# xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'
xpath_results = '//div[contains(@class, "snippet ")]'
@@ -339,8 +339,8 @@ def _parse_search(resp):
return result_list
-def _parse_news(json_resp):
- result_list = []
+def _parse_news(json_resp) -> EngineResults:
+ result_list = EngineResults()
for result in json_resp["results"]:
item = {
@@ -356,8 +356,8 @@ def _parse_news(json_resp):
return result_list
-def _parse_images(json_resp):
- result_list = []
+def _parse_images(json_resp) -> EngineResults:
+ result_list = EngineResults()
for result in json_resp["results"]:
item = {
@@ -375,8 +375,8 @@ def _parse_images(json_resp):
return result_list
-def _parse_videos(json_resp):
- result_list = []
+def _parse_videos(json_resp) -> EngineResults:
+ result_list = EngineResults()
for result in json_resp["results"]:
diff --git a/searx/engines/deepl.py b/searx/engines/deepl.py
index eff746b6f..aec17076f 100644
--- a/searx/engines/deepl.py
+++ b/searx/engines/deepl.py
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Deepl translation engine"""
-from searx.result_types import Translations
+from searx.result_types import EngineResults
about = {
"website": 'https://deepl.com',
@@ -39,15 +39,14 @@ def request(_query, params):
return params
-def response(resp):
- results = []
+def response(resp) -> EngineResults:
- result = resp.json()
+ res = EngineResults()
+ data = resp.json()
+ if not data.get('translations'):
+ return res
- if not result.get('translations'):
- return results
+ translations = [res.types.Translations.Item(text=t['text']) for t in data['translations']]
+ res.add(res.types.Translations(translations=translations))
- translations = [Translations.Item(text=t['text']) for t in result['translations']]
- Translations(results=results, translations=translations)
-
- return results
+ return res
diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py
index 97659465a..ffcdb46a9 100644
--- a/searx/engines/demo_offline.py
+++ b/searx/engines/demo_offline.py
@@ -13,6 +13,7 @@ close to the implementation, its just a simple example. To get in use of this
"""
import json
+from searx.result_types import EngineResults
engine_type = 'offline'
categories = ['general']
@@ -48,14 +49,14 @@ def init(engine_settings=None):
)
-def search(query, request_params):
+def search(query, request_params) -> EngineResults:
"""Query (offline) engine and return results. Assemble the list of results from
your local engine. In this demo engine we ignore the 'query' term, usual
you would pass the 'query' term to your local engine to filter out the
results.
"""
- ret_val = []
+ res = EngineResults()
result_list = json.loads(_my_offline_engine)
@@ -67,6 +68,6 @@ def search(query, request_params):
# choose a result template or comment out to use the *default*
'template': 'key-value.html',
}
- ret_val.append(entry)
+ res.append(entry)
- return ret_val
+ return res
diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py
index 6accc0033..ee06c3b31 100644
--- a/searx/engines/demo_online.py
+++ b/searx/engines/demo_online.py
@@ -17,6 +17,7 @@ list in ``settings.yml``:
from json import loads
from urllib.parse import urlencode
+from searx.result_types import EngineResults
engine_type = 'online'
send_accept_language_header = True
@@ -70,21 +71,28 @@ def request(query, params):
return params
-def response(resp):
+def response(resp) -> EngineResults:
"""Parse out the result items from the response. In this example we parse the
response from `api.artic.edu <https://artic.edu>`__ and filter out all
images.
"""
- results = []
+ res = EngineResults()
json_data = loads(resp.text)
+ res.add(
+ res.types.Answer(
+ answer="this is a dummy answer ..",
+ url="https://example.org",
+ )
+ )
+
for result in json_data['data']:
if not result['image_id']:
continue
- results.append(
+ res.append(
{
'url': 'https://artic.edu/artworks/%(id)s' % result,
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
@@ -95,4 +103,4 @@ def response(resp):
}
)
- return results
+ return res
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
index 7f562c716..83ab28ff2 100644
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -7,7 +7,7 @@ import urllib.parse
from lxml import html
from searx.utils import eval_xpath, extract_text
-from searx.result_types import Translations
+from searx.result_types import EngineResults
from searx.network import get as http_get # https://github.com/searxng/searxng/issues/762
# about
@@ -43,9 +43,9 @@ def _clean_up_node(node):
n.getparent().remove(n)
-def response(resp):
+def response(resp) -> EngineResults:
+ results = EngineResults()
- results = []
item_list = []
if not resp.ok:
@@ -85,7 +85,7 @@ def response(resp):
synonyms.append(p_text)
- item = Translations.Item(text=text, synonyms=synonyms)
+ item = results.types.Translations.Item(text=text, synonyms=synonyms)
item_list.append(item)
# the "autotranslate" of dictzone is loaded by the JS from URL:
@@ -98,7 +98,7 @@ def response(resp):
# works only sometimes?
autotranslate = http_get(f"{base_url}/trans/{query}/{from_lang}_{to_lang}", timeout=1.0)
if autotranslate.ok and autotranslate.text:
- item_list.insert(0, Translations.Item(text=autotranslate.text))
+ item_list.insert(0, results.types.Translations.Item(text=autotranslate.text))
- Translations(results=results, translations=item_list, url=resp.search_params["url"])
+ results.add(results.types.Translations(translations=item_list, url=resp.search_params["url"]))
return results
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index ff6727959..a03a94063 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -27,7 +27,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7
from searx import redisdb
from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineCaptchaException
-from searx.result_types import Answer
+from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
@@ -355,12 +355,12 @@ def is_ddg_captcha(dom):
return bool(eval_xpath(dom, "//form[@id='challenge-form']"))
-def response(resp):
+def response(resp) -> EngineResults:
+ results = EngineResults()
if resp.status_code == 303:
- return []
+ return results
- results = []
doc = lxml.html.fromstring(resp.text)
if is_ddg_captcha(doc):
@@ -398,8 +398,15 @@ def response(resp):
and "URL Decoded:" not in zero_click
):
current_query = resp.search_params["data"].get("q")
-
- Answer(results=results, answer=zero_click, url="https://duckduckgo.com/?" + urlencode({"q": current_query}))
+ results.add(
+ results.types.Answer(
+ answer=zero_click,
+ url="https://duckduckgo.com/?"
+ + urlencode(
+ {"q": current_query},
+ ),
+ )
+ )
return results
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index e1947f4c0..75021242f 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -21,7 +21,7 @@ from lxml import html
from searx.data import WIKIDATA_UNITS
from searx.utils import extract_text, html_to_text, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
-from searx.result_types import Answer
+from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
@@ -76,9 +76,9 @@ def request(query, params):
return params
-def response(resp):
+def response(resp) -> EngineResults:
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
- results = []
+ results = EngineResults()
search_res = resp.json()
@@ -103,7 +103,12 @@ def response(resp):
answer_type = search_res.get('AnswerType')
logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer)
if isinstance(answer, str) and answer_type not in ['calc', 'ip']:
- Answer(results=results, answer=html_to_text(answer), url=search_res.get('AbstractURL', ''))
+ results.add(
+ results.types.Answer(
+ answer=html_to_text(answer),
+ url=search_res.get('AbstractURL', ''),
+ )
+ )
# add infobox
if 'Definition' in search_res:
diff --git a/searx/engines/google.py b/searx/engines/google.py
index d390e6e98..9fd037a4f 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -25,7 +25,7 @@ from searx.locales import language_tag, region_tag, get_official_locales
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.exceptions import SearxEngineCaptchaException
from searx.enginelib.traits import EngineTraits
-from searx.result_types import Answer
+from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
@@ -316,12 +316,12 @@ def _parse_data_images(dom):
return data_image_map
-def response(resp):
+def response(resp) -> EngineResults:
"""Get response from google's search request"""
# pylint: disable=too-many-branches, too-many-statements
detect_google_sorry(resp)
- results = []
+ results = EngineResults()
# convert the text to dom
dom = html.fromstring(resp.text)
@@ -332,7 +332,12 @@ def response(resp):
for item in answer_list:
for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'):
bubble.drop_tree()
- Answer(results=results, answer=extract_text(item), url=(eval_xpath(item, '../..//a/@href') + [None])[0])
+ results.add(
+ results.types.Answer(
+ answer=extract_text(item),
+ url=(eval_xpath(item, '../..//a/@href') + [None])[0],
+ )
+ )
# parse results
diff --git a/searx/engines/libretranslate.py b/searx/engines/libretranslate.py
index 2e6663cb0..b9b8c7165 100644
--- a/searx/engines/libretranslate.py
+++ b/searx/engines/libretranslate.py
@@ -3,7 +3,7 @@
import random
import json
-from searx.result_types import Translations
+from searx.result_types import EngineResults
about = {
"website": 'https://libretranslate.com',
@@ -45,15 +45,15 @@ def request(_query, params):
return params
-def response(resp):
- results = []
+def response(resp) -> EngineResults:
+ results = EngineResults()
json_resp = resp.json()
text = json_resp.get('translatedText')
if not text:
return results
- item = Translations.Item(text=text, examples=json_resp.get('alternatives', []))
- Translations(results=results, translations=[item])
+ item = results.types.Translations.Item(text=text, examples=json_resp.get('alternatives', []))
+ results.add(results.types.Translations(translations=[item]))
return results
diff --git a/searx/engines/lingva.py b/searx/engines/lingva.py
index 1cf70f636..156e37d89 100644
--- a/searx/engines/lingva.py
+++ b/searx/engines/lingva.py
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Lingva (alternative Google Translate frontend)"""
-from searx.result_types import Translations
+from searx.result_types import EngineResults
about = {
"website": 'https://lingva.ml',
@@ -23,8 +23,8 @@ def request(_query, params):
return params
-def response(resp):
- results = []
+def response(resp) -> EngineResults:
+ results = EngineResults()
result = resp.json()
info = result["info"]
@@ -44,7 +44,7 @@ def response(resp):
for definition in info['definitions']:
for translation in definition['list']:
data.append(
- Translations.Item(
+ results.types.Translations.Item(
text=result['translation'],
definitions=[translation['definition']] if translation['definition'] else [],
examples=[translation['example']] if translation['example'] else [],
@@ -55,19 +55,20 @@ def response(resp):
for translation in info["extraTranslations"]:
for word in translation["list"]:
data.append(
- Translations.Item(
+ results.types.Translations.Item(
text=word['word'],
definitions=word['meanings'],
)
)
if not data and result['translation']:
- data.append(Translations.Item(text=result['translation']))
+ data.append(results.types.Translations.Item(text=result['translation']))
params = resp.search_params
- Translations(
- results=results,
- translations=data,
- url=f"{url}/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}",
+ results.add(
+ results.types.Translations(
+ translations=data,
+ url=f"{url}/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}",
+ )
)
return results
diff --git a/searx/engines/mozhi.py b/searx/engines/mozhi.py
index c337a287c..af9bb344c 100644
--- a/searx/engines/mozhi.py
+++ b/searx/engines/mozhi.py
@@ -5,7 +5,7 @@ import random
import re
import urllib.parse
-from searx.result_types import Translations
+from searx.result_types import EngineResults
about = {
"website": 'https://codeberg.org/aryak/mozhi',
@@ -33,11 +33,11 @@ def request(_query, params):
return params
-def response(resp):
- results = []
+def response(resp) -> EngineResults:
+ res = EngineResults()
translation = resp.json()
- item = Translations.Item(text=translation['translated-text'])
+ item = res.types.Translations.Item(text=translation['translated-text'])
if translation['target_transliteration'] and not re.match(
re_transliteration_unsupported, translation['target_transliteration']
@@ -57,5 +57,5 @@ def response(resp):
url = urllib.parse.urlparse(resp.search_params["url"])
# remove the api path
url = url._replace(path="", fragment="").geturl()
- Translations(results=results, translations=[item], url=url)
- return results
+ res.add(res.types.Translations(translations=[item], url=url))
+ return res
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 3b1885522..64ed6809b 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -13,7 +13,7 @@ from flask_babel import gettext
from searx.data import OSM_KEYS_TAGS, CURRENCIES
from searx.external_urls import get_external_url
from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail
-from searx.result_types import Answer
+from searx.result_types import EngineResults
# about
about = {
@@ -141,8 +141,8 @@ def request(query, params):
return params
-def response(resp):
- results = []
+def response(resp) -> EngineResults:
+ results = EngineResults()
nominatim_json = resp.json()
user_language = resp.search_params['language']
@@ -152,10 +152,12 @@ def response(resp):
l = re.findall(r"\s*(.*)\s+to\s+(.+)", resp.search_params["query"])
if l:
point1, point2 = [urllib.parse.quote_plus(p) for p in l[0]]
- Answer(
- results=results,
- answer=gettext('Show route in map ..'),
- url=f"{route_url}/?point={point1}&point={point2}",
+
+ results.add(
+ results.types.Answer(
+ answer=gettext('Show route in map ..'),
+ url=f"{route_url}/?point={point1}&point={point2}",
+ )
)
# simplify the code below: make sure extratags is a dictionary
diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py
index b2f6c3e31..e3a7ab470 100644
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@@ -19,6 +19,8 @@ from urllib.parse import urlencode
from datetime import datetime
from flask_babel import gettext
+from searx.result_types import EngineResults
+
if TYPE_CHECKING:
import logging
@@ -154,9 +156,9 @@ def parse_tineye_match(match_json):
}
-def response(resp):
+def response(resp) -> EngineResults:
"""Parse HTTP response from TinEye."""
- results = []
+ results = EngineResults()
# handle the 422 client side errors, and the possible 400 status code error
if resp.status_code in (400, 422):
@@ -183,8 +185,7 @@ def response(resp):
message = ','.join(description)
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
- # from searx.result_types import Answer
- # Answer(results=results, answer=message)
+ # results.add(results.types.Answer(answer=message))
logger.info(message)
return results
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
index 632e3d2e1..cffb6eda3 100644
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@@ -5,7 +5,7 @@
import urllib.parse
-from searx.result_types import Translations
+from searx.result_types import EngineResults
# about
about = {
@@ -37,8 +37,8 @@ def request(query, params): # pylint: disable=unused-argument
return params
-def response(resp):
- results = []
+def response(resp) -> EngineResults:
+ results = EngineResults()
data = resp.json()
args = {
@@ -53,7 +53,7 @@ def response(resp):
examples = [f"{m['segment']} : {m['translation']}" for m in data['matches'] if m['translation'] != text]
- item = Translations.Item(text=text, examples=examples)
- Translations(results=results, translations=[item], url=link)
+ item = results.types.Translations.Item(text=text, examples=examples)
+ results.add(results.types.Translations(translations=[item], url=link))
return results
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 5df74a08f..db892d392 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -74,6 +74,7 @@ from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
from searx.network import raise_for_httperror
+from searx.result_types import EngineResults
search_url = None
"""
@@ -261,15 +262,15 @@ def request(query, params):
return params
-def response(resp): # pylint: disable=too-many-branches
- '''Scrap *results* from the response (see :ref:`result types`).'''
+def response(resp) -> EngineResults: # pylint: disable=too-many-branches
+ """Scrap *results* from the response (see :ref:`result types`)."""
+ results = EngineResults()
+
if no_result_for_http_status and resp.status_code in no_result_for_http_status:
- return []
+ return results
raise_for_httperror(resp)
- results = []
-
if not resp.text:
return results