summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2025-08-21 17:57:58 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2025-09-01 14:51:15 +0200
commit9ac9c8c4f50acd801bfc39107c94d17ee9bee72b (patch)
tree0545c9e9ae37462cfd64b5e85620453ffe2981c8 /searx/engines
parentb8085d27aca35b3c60ef50bf0683018d6a6b51b3 (diff)
[mod] typification of SearXNG: add new result type Code
This patch adds a new result type: Code - Python class: searx/result_types/code.py - Jinja template: searx/templates/simple/result_templates/code.html - CSS (less) client/simple/src/less/result_types/code.less Signed-of-by: Markus Heiser <markus.heiser@darmarIT.de>
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/github_code.py64
-rw-r--r--searx/engines/searchcode_code.py87
2 files changed, 56 insertions, 95 deletions
diff --git a/searx/engines/github_code.py b/searx/engines/github_code.py
index 4bafe9c0d..55060b8de 100644
--- a/searx/engines/github_code.py
+++ b/searx/engines/github_code.py
@@ -68,10 +68,8 @@ code blocks in a single file might be returned from the API).
from __future__ import annotations
import typing as t
-from urllib.parse import urlencode, urlparse
+from urllib.parse import urlencode
-from pygments.lexers import guess_lexer_for_filename
-from pygments.util import ClassNotFound
from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response
from searx.network import raise_for_httperror
@@ -162,26 +160,10 @@ def request(query: str, params: dict[str, t.Any]) -> None:
params['raise_for_httperror'] = False
-def get_code_language_name(filename: str, code_snippet: str) -> str | None:
- """Returns a code language name by pulling information from the filename if
- possible otherwise by scanning the passed code snippet. In case there is any
- parsing error just default to no syntax highlighting."""
- try:
- lexer = guess_lexer_for_filename(filename, _text=code_snippet)
- if lexer is None:
- return None
- code_name_aliases = lexer.aliases
- if len(code_name_aliases) == 0:
- return None
- return code_name_aliases[0]
- except ClassNotFound:
- return None
-
-
def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]:
"""
Iterate over multiple possible matches, for each extract a code fragment.
- GitHub additionally sends context for _word_ highlights; pygments supports
+ Github additionally sends context for _word_ highlights; pygments supports
highlighting lines, as such we calculate which lines to highlight while
traversing the text.
"""
@@ -231,18 +213,18 @@ def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[i
def response(resp: SXNG_Response) -> EngineResults:
- results = EngineResults()
+ res = EngineResults()
if resp.status_code == 422:
# on a invalid search term the status code 422 "Unprocessable Content"
# is returned / e.g. search term is "user: foo" instead "user:foo"
- return results
+ return res
# raise for other errors
raise_for_httperror(resp)
for item in resp.json().get('items', []):
- repo = item['repository']
- text_matches = item['text_matches']
+ repo: dict[str, str] = item['repository'] # pyright: ignore[reportAny]
+ text_matches: list[dict[str, str]] = item['text_matches'] # pyright: ignore[reportAny]
# ensure picking only the code contents in the blob
code_matches = [
match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content"
@@ -251,22 +233,18 @@ def response(resp: SXNG_Response) -> EngineResults:
if not ghc_highlight_matching_lines:
highlighted_lines_index: set[int] = set()
- code_snippet = "\n".join(lines)
-
- kwargs: dict[str, t.Any] = {
- 'template': 'code.html',
- 'url': item['html_url'],
- 'title': f"{repo['full_name']} · {item['path']}",
- 'content': repo['description'],
- 'repository': repo['html_url'],
- 'codelines': [(i + 1, line) for (i, line) in enumerate(lines)],
- 'hl_lines': highlighted_lines_index,
- 'code_language': get_code_language_name(filename=item['name'], code_snippet=code_snippet),
- # important to set for highlighing
- 'strip_whitespace': ghc_strip_whitespace,
- 'strip_new_lines': ghc_strip_new_lines,
- 'parsed_url': urlparse(item['html_url']),
- }
- results.add(results.types.LegacyResult(**kwargs))
-
- return results
+ res.add(
+ res.types.Code(
+ url=item["html_url"], # pyright: ignore[reportAny]
+ title=f"{repo['full_name']} · {item['name']}",
+ filename=f"{item['path']}",
+ content=repo['description'],
+ repository=repo['html_url'],
+ codelines=[(i + 1, line) for (i, line) in enumerate(lines)],
+ hl_lines=highlighted_lines_index,
+ strip_whitespace=ghc_strip_whitespace,
+ strip_new_lines=ghc_strip_new_lines,
+ )
+ )
+
+ return res
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 2196b0ad2..c0a6550a0 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -1,79 +1,62 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-"""Searchcode (IT)
+"""Searchcode (IT)"""
-"""
+from __future__ import annotations
+
+import typing as t
-from json import loads
from urllib.parse import urlencode
+from searx.result_types import EngineResults
+from searx.extended_types import SXNG_Response
+
# about
about = {
- "website": 'https://searchcode.com/',
+ "website": "https://searchcode.com/",
"wikidata_id": None,
- "official_api_documentation": 'https://searchcode.com/api/',
+ "official_api_documentation": "https://searchcode.com/api/",
"use_official_api": True,
"require_api_key": False,
- "results": 'JSON',
+ "results": "JSON",
}
# engine dependent config
-categories = ['it']
-search_api = 'https://searchcode.com/api/codesearch_I/?'
-
-# special code-endings which are not recognised by the file ending
-code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'}
+categories = ["it"]
+search_api = "https://searchcode.com/api/codesearch_I/?"
# paging is broken in searchcode.com's API .. not sure it will ever been fixed
# paging = True
-def request(query, params):
- args = urlencode(
- {
- 'q': query,
- # paging is broken in searchcode.com's API
- # 'p': params['pageno'] - 1,
- # 'per_page': 10,
- }
- )
- params['url'] = search_api + args
- logger.debug("query_url --> %s", params['url'])
- return params
+def request(query: str, params: dict[str, t.Any]) -> None:
+ args = {
+ "q": query,
+ # paging is broken in searchcode.com's API
+ # "p": params["pageno"] - 1,
+ # "per_page": 10,
+ }
+ params["url"] = search_api + urlencode(args)
+ logger.debug("query_url --> %s", params["url"])
-def response(resp):
- results = []
- search_results = loads(resp.text)
+def response(resp: SXNG_Response) -> EngineResults:
+ res = EngineResults()
# parse results
- for result in search_results.get('results', []):
- href = result['url']
- title = "" + result['name'] + " - " + result['filename']
- repo = result['repo']
-
+ for result in resp.json().get("results", []):
lines = {}
- for line, code in result['lines'].items():
+ for line, code in result["lines"].items():
lines[int(line)] = code
- code_language = code_endings.get(
- result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower()
- )
-
- # append result
- results.append(
- {
- 'url': href,
- 'title': title,
- 'content': '',
- 'repository': repo,
- 'codelines': sorted(lines.items()),
- 'code_language': code_language,
- 'template': 'code.html',
- 'strip_whitespace': True,
- 'strip_new_lines': True,
- }
+ res.add(
+ res.types.Code(
+ url=result["url"],
+ title=f'{result["name"]} - {result["filename"]}',
+ repository=result["repo"],
+ filename=result["filename"],
+ codelines=sorted(lines.items()),
+ strip_whitespace=True,
+ )
)
- # return results
- return results
+ return res