diff options
| author | Hackurei <138650713+Hackurei@users.noreply.github.com> | 2024-02-07 15:18:13 -0700 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-02-18 09:17:22 +0100 |
| commit | 3df53d6e503b97533dda33691462d949aa480dda (patch) | |
| tree | 1a3b492b8ea9d2cb45393403770b186be7bf1878 /searx/engines | |
| parent | c197c0e35e6eaa823d6ba8606df4e5a5c598a07b (diff) | |
[feat] engine: implementation of ask.com
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/ask.py | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/searx/engines/ask.py b/searx/engines/ask.py new file mode 100644 index 000000000..f9bcdf1e6 --- /dev/null +++ b/searx/engines/ask.py @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Ask.com""" + +from urllib.parse import urlencode +import re +from lxml import html + +# Metadata +about = { + "website": "https://www.ask.com/", + "wikidata_id": 'Q847564', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +# Engine Configuration +categories = ['general'] +paging = True + +# Base URL +base_url = "https://www.ask.com/web" + + +def request(query, params): + + query_params = { + "q": query, + "page": params["pageno"], + } + + params["url"] = f"{base_url}?{urlencode(query_params)}" + return params + + +def response(resp): + + text = html.fromstring(resp.text).text_content() + urls_match = re.findall(r'"url":"(.*?)"', text) + titles_match = re.findall(r'"title":"(.*?)"', text)[3:] + content_match = re.findall(r'"abstract":"(.*?)"', text) + + results = [ + { + "url": url, + "title": title, + "content": content, + } + for url, title, content in zip(urls_match, titles_match, content_match) + if "&qo=relatedSearchNarrow" not in url + # Related searches shouldn't be in the search results: www.ask.com/web&q=related + ] + + return results |