diff options
Diffstat (limited to 'searx/engines/sogou.py')
| -rw-r--r-- | searx/engines/sogou.py | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/searx/engines/sogou.py b/searx/engines/sogou.py new file mode 100644 index 000000000..e36b4cd4f --- /dev/null +++ b/searx/engines/sogou.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Sogou search engine for searxng""" + +from urllib.parse import urlencode +from lxml import html + +from searx.utils import extract_text + +# Metadata +about = { + "website": "https://www.sogou.com/", + "wikidata_id": "Q7554565", + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +# Engine Configuration +categories = ["general"] +paging = True +max_page = 10 +time_range_support = True + +time_range_dict = {'day': 'inttime_day', 'week': 'inttime_week', 'month': 'inttime_month', 'year': 'inttime_year'} + +# Base URL +base_url = "https://www.sogou.com" + + +def request(query, params): + query_params = { + "query": query, + "page": params["pageno"], + } + + if time_range_dict.get(params['time_range']): + query_params["s_from"] = time_range_dict.get(params['time_range']) + query_params["tsn"] = 1 + + params["url"] = f"{base_url}/web?{urlencode(query_params)}" + return params + + +def response(resp): + dom = html.fromstring(resp.text) + results = [] + + for item in dom.xpath('//div[contains(@class, "vrwrap")]'): + title = extract_text(item.xpath('.//h3[contains(@class, "vr-title")]/a')) + url = extract_text(item.xpath('.//h3[contains(@class, "vr-title")]/a/@href')) + + if url.startswith("/link?url="): + url = f"{base_url}{url}" + + content = extract_text(item.xpath('.//div[contains(@class, "text-layout")]//p[contains(@class, "star-wiki")]')) + if not content: + content = extract_text(item.xpath('.//div[contains(@class, "fz-mid space-txt")]')) + + if title and url: + results.append( + { + "title": title, + "url": url, + "content": content, + } + ) + + return results |