summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/dev/engines/online/sourcehut.rst8
-rw-r--r--searx/engines/sourcehut.py90
-rw-r--r--searx/settings.yml19
3 files changed, 101 insertions, 16 deletions
diff --git a/docs/dev/engines/online/sourcehut.rst b/docs/dev/engines/online/sourcehut.rst
new file mode 100644
index 000000000..1c5ec7aec
--- /dev/null
+++ b/docs/dev/engines/online/sourcehut.rst
@@ -0,0 +1,8 @@
+.. _sourcehut engine:
+
+=========
+Sourcehut
+=========
+
+.. automodule:: searx.engines.sourcehut
+ :members:
diff --git a/searx/engines/sourcehut.py b/searx/engines/sourcehut.py
new file mode 100644
index 000000000..fa3df8b23
--- /dev/null
+++ b/searx/engines/sourcehut.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Engine to search in the collaborative software platform SourceHut_.
+
+.. _SourceHut: https://sourcehut.org/
+
+Configuration
+=============
+
+You can configure the following setting:
+
+- :py:obj:`sourcehut_sort_order`
+
+.. code:: yaml
+
+ - name: sourcehut
+ shortcut: srht
+ engine: sourcehut
+ # sourcehut_sort_order: longest-active
+
+Implementations
+===============
+
+"""
+
+import typing as t
+
+from urllib.parse import urlencode
+from lxml import html
+
+from searx.utils import eval_xpath, eval_xpath_list, extract_text, searxng_useragent
+from searx.result_types import EngineResults
+
+if t.TYPE_CHECKING:
+ from searx.extended_types import SXNG_Response
+ from searx.search.processors import OnlineParams
+
+
+about = {
+ "website": "https://sourcehut.org",
+ "wikidata_id": "Q78514485",
+ "official_api_documentation": "https://man.sr.ht/",
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": "HTML",
+}
+
+categories = ["it", "repos"]
+paging = True
+
+base_url: str = "https://sr.ht/projects"
+"""Browse public projects."""
+
+
+sourcehut_sort_order: str = "recently-updated"
+"""The sort order of the results. Possible values:
+
+- ``recently-updated``
+- ``longest-active``
+"""
+
+
+def request(query: str, params: "OnlineParams") -> None:
+
+ args = {"search": query, "page": params["pageno"], "sort": sourcehut_sort_order}
+ params["url"] = f"{base_url}?{urlencode(args)}"
+
+ # standard user agents are blocked by 'go-away', a foss bot detection tool
+ params["headers"]["User-Agent"] = searxng_useragent()
+
+
+def response(resp: "SXNG_Response") -> EngineResults:
+
+ res = EngineResults()
+ doc = html.fromstring(resp.text)
+
+ for item in eval_xpath_list(doc, "(//div[@class='event-list'])[1]/div[contains(@class, 'event')]"):
+ res.add(
+ res.types.LegacyResult(
+ template="packages.html",
+ url=base_url + (extract_text(eval_xpath(item, "./h4/a[2]/@href")) or ""),
+ title=extract_text(eval_xpath(item, "./h4")),
+ package_name=extract_text(eval_xpath(item, "./h4/a[2]")),
+ content=extract_text(eval_xpath(item, "./p")),
+ maintainer=(extract_text(eval_xpath(item, "./h4/a[1]")) or "").removeprefix("~"),
+ tags=[
+ tag.removeprefix("#") for tag in eval_xpath_list(item, "./div[contains(@class, 'tags')]/a/text()")
+ ],
+ )
+ )
+ return res
diff --git a/searx/settings.yml b/searx/settings.yml
index 2e418177a..a3cccb5d5 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -2677,23 +2677,10 @@ engines:
- name: sourcehut
shortcut: srht
- engine: xpath
- paging: true
- search_url: https://sr.ht/projects?page={pageno}&search={query}
- results_xpath: (//div[@class="event-list"])[1]/div[@class="event"]
- url_xpath: ./h4/a[2]/@href
- title_xpath: ./h4/a[2]
- content_xpath: ./p
- first_page_num: 1
- categories: [it, repos]
+ engine: sourcehut
+ # https://docs.searxng.org/dev/engines/online/sourcehut.html
+ # sourcehut_sort_order: longest-active
disabled: true
- about:
- website: https://sr.ht
- wikidata_id: Q78514485
- official_api_documentation: https://man.sr.ht/
- use_official_api: false
- require_api_key: false
- results: HTML
- name: goo
shortcut: goo