diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2025-08-21 17:57:58 +0200 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarIT.de> | 2025-09-01 14:51:15 +0200 |
| commit | 9ac9c8c4f50acd801bfc39107c94d17ee9bee72b (patch) | |
| tree | 0545c9e9ae37462cfd64b5e85620453ffe2981c8 /searx/engines | |
| parent | b8085d27aca35b3c60ef50bf0683018d6a6b51b3 (diff) | |
[mod] typification of SearXNG: add new result type Code
This patch adds a new result type: Code
- Python class: searx/result_types/code.py
- Jinja template: searx/templates/simple/result_templates/code.html
- CSS (less) client/simple/src/less/result_types/code.less
Signed-of-by: Markus Heiser <markus.heiser@darmarIT.de>
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/github_code.py | 64 | ||||
| -rw-r--r-- | searx/engines/searchcode_code.py | 87 |
2 files changed, 56 insertions, 95 deletions
diff --git a/searx/engines/github_code.py b/searx/engines/github_code.py index 4bafe9c0d..55060b8de 100644 --- a/searx/engines/github_code.py +++ b/searx/engines/github_code.py @@ -68,10 +68,8 @@ code blocks in a single file might be returned from the API). from __future__ import annotations import typing as t -from urllib.parse import urlencode, urlparse +from urllib.parse import urlencode -from pygments.lexers import guess_lexer_for_filename -from pygments.util import ClassNotFound from searx.result_types import EngineResults from searx.extended_types import SXNG_Response from searx.network import raise_for_httperror @@ -162,26 +160,10 @@ def request(query: str, params: dict[str, t.Any]) -> None: params['raise_for_httperror'] = False -def get_code_language_name(filename: str, code_snippet: str) -> str | None: - """Returns a code language name by pulling information from the filename if - possible otherwise by scanning the passed code snippet. In case there is any - parsing error just default to no syntax highlighting.""" - try: - lexer = guess_lexer_for_filename(filename, _text=code_snippet) - if lexer is None: - return None - code_name_aliases = lexer.aliases - if len(code_name_aliases) == 0: - return None - return code_name_aliases[0] - except ClassNotFound: - return None - - def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]: """ Iterate over multiple possible matches, for each extract a code fragment. - GitHub additionally sends context for _word_ highlights; pygments supports + Github additionally sends context for _word_ highlights; pygments supports highlighting lines, as such we calculate which lines to highlight while traversing the text. """ @@ -231,18 +213,18 @@ def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[i def response(resp: SXNG_Response) -> EngineResults: - results = EngineResults() + res = EngineResults() if resp.status_code == 422: # on a invalid search term the status code 422 "Unprocessable Content" # is returned / e.g. search term is "user: foo" instead "user:foo" - return results + return res # raise for other errors raise_for_httperror(resp) for item in resp.json().get('items', []): - repo = item['repository'] - text_matches = item['text_matches'] + repo: dict[str, str] = item['repository'] # pyright: ignore[reportAny] + text_matches: list[dict[str, str]] = item['text_matches'] # pyright: ignore[reportAny] # ensure picking only the code contents in the blob code_matches = [ match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content" @@ -251,22 +233,18 @@ def response(resp: SXNG_Response) -> EngineResults: if not ghc_highlight_matching_lines: highlighted_lines_index: set[int] = set() - code_snippet = "\n".join(lines) - - kwargs: dict[str, t.Any] = { - 'template': 'code.html', - 'url': item['html_url'], - 'title': f"{repo['full_name']} · {item['path']}", - 'content': repo['description'], - 'repository': repo['html_url'], - 'codelines': [(i + 1, line) for (i, line) in enumerate(lines)], - 'hl_lines': highlighted_lines_index, - 'code_language': get_code_language_name(filename=item['name'], code_snippet=code_snippet), - # important to set for highlighing - 'strip_whitespace': ghc_strip_whitespace, - 'strip_new_lines': ghc_strip_new_lines, - 'parsed_url': urlparse(item['html_url']), - } - results.add(results.types.LegacyResult(**kwargs)) - - return results + res.add( + res.types.Code( + url=item["html_url"], # pyright: ignore[reportAny] + title=f"{repo['full_name']} · {item['name']}", + filename=f"{item['path']}", + content=repo['description'], + repository=repo['html_url'], + codelines=[(i + 1, line) for (i, line) in enumerate(lines)], + hl_lines=highlighted_lines_index, + strip_whitespace=ghc_strip_whitespace, + strip_new_lines=ghc_strip_new_lines, + ) + ) + + return res diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 2196b0ad2..c0a6550a0 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -1,79 +1,62 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""Searchcode (IT) +"""Searchcode (IT)""" -""" +from __future__ import annotations + +import typing as t -from json import loads from urllib.parse import urlencode +from searx.result_types import EngineResults +from searx.extended_types import SXNG_Response + # about about = { - "website": 'https://searchcode.com/', + "website": "https://searchcode.com/", "wikidata_id": None, - "official_api_documentation": 'https://searchcode.com/api/', + "official_api_documentation": "https://searchcode.com/api/", "use_official_api": True, "require_api_key": False, - "results": 'JSON', + "results": "JSON", } # engine dependent config -categories = ['it'] -search_api = 'https://searchcode.com/api/codesearch_I/?' - -# special code-endings which are not recognised by the file ending -code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'} +categories = ["it"] +search_api = "https://searchcode.com/api/codesearch_I/?" # paging is broken in searchcode.com's API .. not sure it will ever been fixed # paging = True -def request(query, params): - args = urlencode( - { - 'q': query, - # paging is broken in searchcode.com's API - # 'p': params['pageno'] - 1, - # 'per_page': 10, - } - ) - params['url'] = search_api + args - logger.debug("query_url --> %s", params['url']) - return params +def request(query: str, params: dict[str, t.Any]) -> None: + args = { + "q": query, + # paging is broken in searchcode.com's API + # "p": params["pageno"] - 1, + # "per_page": 10, + } + params["url"] = search_api + urlencode(args) + logger.debug("query_url --> %s", params["url"]) -def response(resp): - results = [] - search_results = loads(resp.text) +def response(resp: SXNG_Response) -> EngineResults: + res = EngineResults() # parse results - for result in search_results.get('results', []): - href = result['url'] - title = "" + result['name'] + " - " + result['filename'] - repo = result['repo'] - + for result in resp.json().get("results", []): lines = {} - for line, code in result['lines'].items(): + for line, code in result["lines"].items(): lines[int(line)] = code - code_language = code_endings.get( - result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower() - ) - - # append result - results.append( - { - 'url': href, - 'title': title, - 'content': '', - 'repository': repo, - 'codelines': sorted(lines.items()), - 'code_language': code_language, - 'template': 'code.html', - 'strip_whitespace': True, - 'strip_new_lines': True, - } + res.add( + res.types.Code( + url=result["url"], + title=f'{result["name"]} - {result["filename"]}', + repository=result["repo"], + filename=result["filename"], + codelines=sorted(lines.items()), + strip_whitespace=True, + ) ) - # return results - return results + return res |