summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2021-02-12 10:58:28 +0100
committerGitHub <noreply@github.com>2021-02-12 10:58:28 +0100
commitd76660463b9001137c092d61a2781464f8345316 (patch)
tree914c1696ebedf570019d8e52ef054df4a9c7995a
parent7dcf67a47afd4a63b62d052b9db51ec8a9c7b40c (diff)
parentff84a1af35c04855ae6e5c2463b978111d8c9fb1 (diff)
Merge pull request #2562 from dalf/mod-json-engine
[mod] json_engine: add content_html_to_text and title_html_to_text
-rw-r--r--searx/engines/json_engine.py20
-rw-r--r--searx/settings.yml4
2 files changed, 19 insertions, 5 deletions
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py
index f4a5ff6d2..8a04d34b2 100644
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
@@ -3,13 +3,15 @@
from collections.abc import Iterable
from json import loads
from urllib.parse import urlencode
-from searx.utils import to_string
+from searx.utils import to_string, html_to_text
search_url = None
url_query = None
content_query = None
title_query = None
+content_html_to_text = False
+title_html_to_text = False
paging = False
suggestion_query = ''
results_query = ''
@@ -92,9 +94,17 @@ def request(query, params):
return params
+def identity(arg):
+ return arg
+
+
def response(resp):
results = []
json = loads(resp.text)
+
+ title_filter = html_to_text if title_html_to_text else identity
+ content_filter = html_to_text if content_html_to_text else identity
+
if results_query:
rs = query(json, results_query)
if not len(rs):
@@ -111,8 +121,8 @@ def response(resp):
content = ""
results.append({
'url': to_string(url),
- 'title': to_string(title),
- 'content': to_string(content),
+ 'title': title_filter(to_string(title)),
+ 'content': content_filter(to_string(content)),
})
else:
for url, title, content in zip(
@@ -122,8 +132,8 @@ def response(resp):
):
results.append({
'url': to_string(url),
- 'title': to_string(title),
- 'content': to_string(content),
+ 'title': title_filter(to_string(title)),
+ 'content': content_filter(to_string(content)),
})
if not suggestion_query:
diff --git a/searx/settings.yml b/searx/settings.yml
index 4e926d73c..32ba504a2 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -267,7 +267,9 @@ engines:
search_url : https://search.crossref.org/dois?q={query}&page={pageno}
url_query : doi
title_query : title
+ title_html_to_text: True
content_query : fullCitation
+ content_html_to_text: True
categories : science
shortcut : cr
about:
@@ -757,6 +759,7 @@ engines:
url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
title_query : metadata/oaf:entity/oaf:result/title/$
content_query : metadata/oaf:entity/oaf:result/description/$
+ content_html_to_text: True
categories : science
shortcut : oad
timeout: 5.0
@@ -776,6 +779,7 @@ engines:
url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
title_query : metadata/oaf:entity/oaf:result/title/$
content_query : metadata/oaf:entity/oaf:result/description/$
+ content_html_to_text: True
categories : science
shortcut : oap
timeout: 5.0