diff options
| author | Alexandre Flament <alex@al-f.net> | 2021-02-12 10:58:28 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-02-12 10:58:28 +0100 |
| commit | d76660463b9001137c092d61a2781464f8345316 (patch) | |
| tree | 914c1696ebedf570019d8e52ef054df4a9c7995a | |
| parent | 7dcf67a47afd4a63b62d052b9db51ec8a9c7b40c (diff) | |
| parent | ff84a1af35c04855ae6e5c2463b978111d8c9fb1 (diff) | |
Merge pull request #2562 from dalf/mod-json-engine
[mod] json_engine: add content_html_to_text and title_html_to_text
| -rw-r--r-- | searx/engines/json_engine.py | 20 | ||||
| -rw-r--r-- | searx/settings.yml | 4 |
2 files changed, 19 insertions, 5 deletions
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index f4a5ff6d2..8a04d34b2 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -3,13 +3,15 @@ from collections.abc import Iterable from json import loads from urllib.parse import urlencode -from searx.utils import to_string +from searx.utils import to_string, html_to_text search_url = None url_query = None content_query = None title_query = None +content_html_to_text = False +title_html_to_text = False paging = False suggestion_query = '' results_query = '' @@ -92,9 +94,17 @@ def request(query, params): return params +def identity(arg): + return arg + + def response(resp): results = [] json = loads(resp.text) + + title_filter = html_to_text if title_html_to_text else identity + content_filter = html_to_text if content_html_to_text else identity + if results_query: rs = query(json, results_query) if not len(rs): @@ -111,8 +121,8 @@ def response(resp): content = "" results.append({ 'url': to_string(url), - 'title': to_string(title), - 'content': to_string(content), + 'title': title_filter(to_string(title)), + 'content': content_filter(to_string(content)), }) else: for url, title, content in zip( @@ -122,8 +132,8 @@ def response(resp): ): results.append({ 'url': to_string(url), - 'title': to_string(title), - 'content': to_string(content), + 'title': title_filter(to_string(title)), + 'content': content_filter(to_string(content)), }) if not suggestion_query: diff --git a/searx/settings.yml b/searx/settings.yml index 4e926d73c..32ba504a2 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -267,7 +267,9 @@ engines: search_url : https://search.crossref.org/dois?q={query}&page={pageno} url_query : doi title_query : title + title_html_to_text: True content_query : fullCitation + content_html_to_text: True categories : science shortcut : cr about: @@ -757,6 +759,7 @@ engines: url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ title_query : metadata/oaf:entity/oaf:result/title/$ content_query : metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: True categories : science shortcut : oad timeout: 5.0 @@ -776,6 +779,7 @@ engines: url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ title_query : metadata/oaf:entity/oaf:result/title/$ content_query : metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: True categories : science shortcut : oap timeout: 5.0 |