diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/semantic_scholar.py | 42 | ||||
| -rw-r--r-- | searx/engines/seznam.py | 37 |
2 files changed, 62 insertions, 17 deletions
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py new file mode 100644 index 000000000..297d0cf71 --- /dev/null +++ b/searx/engines/semantic_scholar.py @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Semantic Scholar (Science) +""" + +from json import dumps, loads + + +search_url = 'https://www.semanticscholar.org/api/1/search' + + +def request(query, params): + params['url'] = search_url + params['method'] = 'POST' + params['headers']['content-type'] = 'application/json' + params['data'] = dumps({ + "queryString": query, + "page": params['pageno'], + "pageSize": 10, + "sort": "relevance", + "useFallbackRankerService": False, + "useFallbackSearchCluster": False, + "getQuerySuggestions": False, + "authors": [], + "coAuthors": [], + "venues": [], + "performTitleMatch": True, + }) + return params + + +def response(resp): + res = loads(resp.text) + results = [] + for result in res['results']: + results.append({ + 'url': result['primaryPaperLink']['url'], + 'title': result['title']['text'], + 'content': result['paperAbstractTruncated'] + }) + + return results diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index 1df92a845..faceb0550 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -7,7 +7,12 @@ from urllib.parse import urlencode, urlparse from lxml import html from searx.poolrequests import get from searx.exceptions import SearxEngineAccessDeniedException -from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +from searx.utils import ( + extract_text, + eval_xpath_list, + eval_xpath_getindex, + eval_xpath, +) # about about = { @@ -26,7 +31,10 @@ def request(query, params): response_index = get(base_url, headers=params['headers'], raise_for_httperror=True) dom = html.fromstring(response_index.text) - url_params = {'q': query} + url_params = { + 'q': query, + 'oq': query, + } for e in eval_xpath_list(dom, '//input[@type="hidden"]'): name = e.get('name') value = e.get('value') @@ -45,20 +53,15 @@ def response(resp): results = [] dom = html.fromstring(resp.content.decode()) - for result_element in eval_xpath_list(dom, '//div[@id="searchpage-root"]//div[@data-dot="results"]/div'): - dot_data = eval_xpath_getindex(result_element, './div/div[@data-dot-data]/@data-dot-data', 0, default=None) - if dot_data is None: - title_element = eval_xpath_getindex(result_element, './/h3/a', 0) - results.append({ - 'url': title_element.get('href'), - 'title': extract_text(title_element), - 'content': extract_text(eval_xpath_getindex(title_element, '../../div[2]', 0)), - }) - elif dot_data == '{"reporter_name":"hint/related/relates"}': - suggestions_element = eval_xpath_getindex(result_element, - './div/div[@data-dot="main-box"]', 0, default=None) - if suggestions_element is not None: - for suggestion in eval_xpath_list(suggestions_element, './/ul/li'): - results.append({'suggestion': extract_text(suggestion)}) + for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'): + result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "Result")]', 0, default=None) + if result_data is None: + continue + title_element = eval_xpath_getindex(result_element, './/h3/a', 0) + results.append({ + 'url': title_element.get('href'), + 'title': extract_text(title_element), + 'content': extract_text(eval_xpath(result_data, './/p[@class="Result-description"]')), + }) return results |