summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/conf.py1
-rw-r--r--docs/dev/result_types/main/code.rst7
-rw-r--r--docs/dev/result_types/main_result.rst2
-rw-r--r--docs/dev/templates.rst27
-rw-r--r--searx/engines/github_code.py64
-rw-r--r--searx/engines/searchcode_code.py87
-rw-r--r--searx/result_types/__init__.py29
-rw-r--r--searx/result_types/code.py185
-rw-r--r--searx/templates/simple/result_templates/code.html26
-rw-r--r--tests/unit/test_engine_github_code.py43
10 files changed, 307 insertions, 164 deletions
diff --git a/docs/conf.py b/docs/conf.py
index a7221e48b..2d730f58c 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -150,6 +150,7 @@ intersphinx_mapping = {
"linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
"valkey": ('https://valkey-py.readthedocs.io/en/stable/', None),
+ "pygments": ("https://pygments.org/", None),
}
issues_github_path = "searxng/searxng"
diff --git a/docs/dev/result_types/main/code.rst b/docs/dev/result_types/main/code.rst
new file mode 100644
index 000000000..399cbd26e
--- /dev/null
+++ b/docs/dev/result_types/main/code.rst
@@ -0,0 +1,7 @@
+.. _result_types.code:
+
+============
+Code Results
+============
+
+.. automodule:: searx.result_types.code
diff --git a/docs/dev/result_types/main_result.rst b/docs/dev/result_types/main_result.rst
index a76ed5e88..f072ea757 100644
--- a/docs/dev/result_types/main_result.rst
+++ b/docs/dev/result_types/main_result.rst
@@ -15,6 +15,7 @@ following types have been implemented so far ..
main/mainresult
main/keyvalue
+ main/code
The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
have not yet been typed. The templates can be used as orientation until the
@@ -27,6 +28,5 @@ final typing is complete.
- :ref:`template map`
- :ref:`template paper`
- :ref:`template packages`
-- :ref:`template code`
- :ref:`template files`
- :ref:`template products`
diff --git a/docs/dev/templates.rst b/docs/dev/templates.rst
index 3633eb2ef..e2fa879c8 100644
--- a/docs/dev/templates.rst
+++ b/docs/dev/templates.rst
@@ -469,33 +469,6 @@ links : :py:class:`dict`
Additional links in the form of ``{'link_name': 'http://example.com'}``
-.. _template code:
-
-``code.html``
--------------
-
-Displays result fields from:
-
-- :ref:`macro result_header` and
-- :ref:`macro result_sub_header`
-
-Additional fields used in the :origin:`code.html
-<searx/templates/simple/result_templates/code.html>`:
-
-content : :py:class:`str`
- Description of the code fragment.
-
-codelines : ``[line1, line2, ...]``
- Lines of the code fragment.
-
-code_language : :py:class:`str`
- Name of the code language, the value is passed to
- :py:obj:`pygments.lexers.get_lexer_by_name`.
-
-repository : :py:class:`str`
- URL of the repository of the code fragment.
-
-
.. _template files:
``files.html``
diff --git a/searx/engines/github_code.py b/searx/engines/github_code.py
index 4bafe9c0d..55060b8de 100644
--- a/searx/engines/github_code.py
+++ b/searx/engines/github_code.py
@@ -68,10 +68,8 @@ code blocks in a single file might be returned from the API).
from __future__ import annotations
import typing as t
-from urllib.parse import urlencode, urlparse
+from urllib.parse import urlencode
-from pygments.lexers import guess_lexer_for_filename
-from pygments.util import ClassNotFound
from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response
from searx.network import raise_for_httperror
@@ -162,26 +160,10 @@ def request(query: str, params: dict[str, t.Any]) -> None:
params['raise_for_httperror'] = False
-def get_code_language_name(filename: str, code_snippet: str) -> str | None:
- """Returns a code language name by pulling information from the filename if
- possible otherwise by scanning the passed code snippet. In case there is any
- parsing error just default to no syntax highlighting."""
- try:
- lexer = guess_lexer_for_filename(filename, _text=code_snippet)
- if lexer is None:
- return None
- code_name_aliases = lexer.aliases
- if len(code_name_aliases) == 0:
- return None
- return code_name_aliases[0]
- except ClassNotFound:
- return None
-
-
def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]:
"""
Iterate over multiple possible matches, for each extract a code fragment.
- GitHub additionally sends context for _word_ highlights; pygments supports
+ Github additionally sends context for _word_ highlights; pygments supports
highlighting lines, as such we calculate which lines to highlight while
traversing the text.
"""
@@ -231,18 +213,18 @@ def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[i
def response(resp: SXNG_Response) -> EngineResults:
- results = EngineResults()
+ res = EngineResults()
if resp.status_code == 422:
# on a invalid search term the status code 422 "Unprocessable Content"
# is returned / e.g. search term is "user: foo" instead "user:foo"
- return results
+ return res
# raise for other errors
raise_for_httperror(resp)
for item in resp.json().get('items', []):
- repo = item['repository']
- text_matches = item['text_matches']
+ repo: dict[str, str] = item['repository'] # pyright: ignore[reportAny]
+ text_matches: list[dict[str, str]] = item['text_matches'] # pyright: ignore[reportAny]
# ensure picking only the code contents in the blob
code_matches = [
match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content"
@@ -251,22 +233,18 @@ def response(resp: SXNG_Response) -> EngineResults:
if not ghc_highlight_matching_lines:
highlighted_lines_index: set[int] = set()
- code_snippet = "\n".join(lines)
-
- kwargs: dict[str, t.Any] = {
- 'template': 'code.html',
- 'url': item['html_url'],
- 'title': f"{repo['full_name']} · {item['path']}",
- 'content': repo['description'],
- 'repository': repo['html_url'],
- 'codelines': [(i + 1, line) for (i, line) in enumerate(lines)],
- 'hl_lines': highlighted_lines_index,
- 'code_language': get_code_language_name(filename=item['name'], code_snippet=code_snippet),
- # important to set for highlighing
- 'strip_whitespace': ghc_strip_whitespace,
- 'strip_new_lines': ghc_strip_new_lines,
- 'parsed_url': urlparse(item['html_url']),
- }
- results.add(results.types.LegacyResult(**kwargs))
-
- return results
+ res.add(
+ res.types.Code(
+ url=item["html_url"], # pyright: ignore[reportAny]
+ title=f"{repo['full_name']} · {item['name']}",
+ filename=f"{item['path']}",
+ content=repo['description'],
+ repository=repo['html_url'],
+ codelines=[(i + 1, line) for (i, line) in enumerate(lines)],
+ hl_lines=highlighted_lines_index,
+ strip_whitespace=ghc_strip_whitespace,
+ strip_new_lines=ghc_strip_new_lines,
+ )
+ )
+
+ return res
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 2196b0ad2..c0a6550a0 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -1,79 +1,62 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-"""Searchcode (IT)
+"""Searchcode (IT)"""
-"""
+from __future__ import annotations
+
+import typing as t
-from json import loads
from urllib.parse import urlencode
+from searx.result_types import EngineResults
+from searx.extended_types import SXNG_Response
+
# about
about = {
- "website": 'https://searchcode.com/',
+ "website": "https://searchcode.com/",
"wikidata_id": None,
- "official_api_documentation": 'https://searchcode.com/api/',
+ "official_api_documentation": "https://searchcode.com/api/",
"use_official_api": True,
"require_api_key": False,
- "results": 'JSON',
+ "results": "JSON",
}
# engine dependent config
-categories = ['it']
-search_api = 'https://searchcode.com/api/codesearch_I/?'
-
-# special code-endings which are not recognised by the file ending
-code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'}
+categories = ["it"]
+search_api = "https://searchcode.com/api/codesearch_I/?"
# paging is broken in searchcode.com's API .. not sure it will ever been fixed
# paging = True
-def request(query, params):
- args = urlencode(
- {
- 'q': query,
- # paging is broken in searchcode.com's API
- # 'p': params['pageno'] - 1,
- # 'per_page': 10,
- }
- )
- params['url'] = search_api + args
- logger.debug("query_url --> %s", params['url'])
- return params
+def request(query: str, params: dict[str, t.Any]) -> None:
+ args = {
+ "q": query,
+ # paging is broken in searchcode.com's API
+ # "p": params["pageno"] - 1,
+ # "per_page": 10,
+ }
+ params["url"] = search_api + urlencode(args)
+ logger.debug("query_url --> %s", params["url"])
-def response(resp):
- results = []
- search_results = loads(resp.text)
+def response(resp: SXNG_Response) -> EngineResults:
+ res = EngineResults()
# parse results
- for result in search_results.get('results', []):
- href = result['url']
- title = "" + result['name'] + " - " + result['filename']
- repo = result['repo']
-
+ for result in resp.json().get("results", []):
lines = {}
- for line, code in result['lines'].items():
+ for line, code in result["lines"].items():
lines[int(line)] = code
- code_language = code_endings.get(
- result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower()
- )
-
- # append result
- results.append(
- {
- 'url': href,
- 'title': title,
- 'content': '',
- 'repository': repo,
- 'codelines': sorted(lines.items()),
- 'code_language': code_language,
- 'template': 'code.html',
- 'strip_whitespace': True,
- 'strip_new_lines': True,
- }
+ res.add(
+ res.types.Code(
+ url=result["url"],
+ title=f'{result["name"]} - {result["filename"]}',
+ repository=result["repo"],
+ filename=result["filename"],
+ codelines=sorted(lines.items()),
+ strip_whitespace=True,
+ )
)
- # return results
- return results
+ return res
diff --git a/searx/result_types/__init__.py b/searx/result_types/__init__.py
index 6d47d3a4f..f4b37df07 100644
--- a/searx/result_types/__init__.py
+++ b/searx/result_types/__init__.py
@@ -13,25 +13,38 @@
from __future__ import annotations
-__all__ = ["Result", "MainResult", "KeyValue", "EngineResults", "AnswerSet", "Answer", "Translations", "WeatherAnswer"]
-
+__all__ = [
+ "Result",
+ "MainResult",
+ "KeyValue",
+ "EngineResults",
+ "AnswerSet",
+ "Answer",
+ "Translations",
+ "WeatherAnswer",
+ "Code",
+]
+
+import typing as t
import abc
-from searx import enginelib
-
from ._base import Result, MainResult, LegacyResult
from .answer import AnswerSet, Answer, Translations, WeatherAnswer
from .keyvalue import KeyValue
+from .code import Code
-class ResultList(list, abc.ABC):
+class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument]
"""Base class of all result lists (abstract)."""
+ @t.final
class types: # pylint: disable=invalid-name
- """The collection of result types (which have already been implemented)."""
+ """The collection of result types (which have already been
+ implemented)."""
Answer = Answer
KeyValue = KeyValue
+ Code = Code
MainResult = MainResult
Result = Result
Translations = Translations
@@ -42,11 +55,11 @@ class ResultList(list, abc.ABC):
def __init__(self):
# pylint: disable=useless-parent-delegation
- super().__init__()
+ super().__init__() # pyright: ignore[reportUnknownMemberType]
def add(self, result: Result | LegacyResult):
"""Add a :py:`Result` item to the result list."""
- self.append(result)
+ self.append(result) # pyright: ignore[reportUnknownMemberType]
class EngineResults(ResultList):
diff --git a/searx/result_types/code.py b/searx/result_types/code.py
new file mode 100644
index 000000000..5350d74f3
--- /dev/null
+++ b/searx/result_types/code.py
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Typification of the *code* results. Results of this type are rendered in
+the :origin:`code.html <searx/templates/simple/result_templates/code.html>`
+template. For highlighting the code passages, Pygments is used.
+
+.. _Pygments: https://pygments.org
+
+----
+
+.. autoclass:: Code
+ :members:
+ :show-inheritance:
+
+"""
+# pylint: disable=too-few-public-methods, disable=invalid-name
+
+from __future__ import annotations
+
+__all__ = ["Code"]
+
+import typing as t
+
+from pygments import highlight # pyright: ignore[reportUnknownVariableType]
+from pygments.lexers._mapping import LEXERS # pyright: ignore[reportMissingTypeStubs]
+from pygments.lexers import guess_lexer, get_lexer_by_name, guess_lexer_for_filename
+from pygments.util import ClassNotFound
+from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module
+
+from ._base import MainResult
+
+
+_pygments_languages: list[str] = []
+
+
+def is_valid_language(code_language: str) -> bool:
+ """Checks if the specified ``code_language`` is known in Pygments."""
+ if not _pygments_languages:
+ for l in LEXERS.values():
+ # l[2] is the tuple with the alias names
+ for alias_name in l[2]:
+ _pygments_languages.append(alias_name.lower())
+ return code_language.lower() in _pygments_languages
+
+
+@t.final
+class Code(MainResult, kw_only=True):
+ """Simple table view which maps *key* names (first col) to *values*
+ (second col)."""
+
+ template: str = "code.html"
+
+ repository: str | None = None
+ """A link related to a repository related to the *result*"""
+
+ codelines: list[tuple[int, str]] = []
+ """A list of two digit tuples where the first item is the line number and
+ the second item is the code line."""
+
+ hl_lines: set[int] = set()
+ """A list of line numbers to highlight"""
+
+ code_language: str = "<guess>"
+ """Pygment's short name of the lexer, e.g. ``text`` for the
+ :py:obj:`pygments.lexers.special.TextLexer`. For a list of available
+ languages consult: `Pygments languages`_. If the language is not in this
+ list, a :py:obj:`ValueError` is raised.
+
+ The default is ``<guess>`` which has a special meaning;
+
+ - If :py:obj:`Code.filename` is set, Pygment's factory method
+ :py:obj:`pygments.lexers.guess_lexer_for_filename` is used to determine
+ the language of the ``codelines``.
+
+ - else Pygment's :py:obj:`pygments.lexers.guess_lexer` factory is used.
+
+ In case the language can't be detected, the fallback is ``text``.
+
+ .. _Pygments languages: https://pygments.org/languages/
+ """
+
+ filename: str | None = None
+ """Optional file name, can help to ``<guess>`` the language of the code (in
+ case of ambiguous short code examples). If :py:obj:`Code.title` is not set,
+ its default is the filename."""
+
+ strip_new_lines: bool = True
+ """Strip leading and trailing newlines for each returned fragment.
+ Single file might return multiple code fragments.
+ """
+
+ strip_whitespace: bool = False
+ """Strip all leading and trailing whitespace for each returned fragment.
+ Single file might return multiple code fragments. Enabling this might break
+ code indentation.
+ """
+
+ def __post_init__(self):
+ super().__post_init__()
+
+ if not self.title and self.filename:
+ self.title = self.filename
+
+ if self.code_language != "<guess>" and not is_valid_language(self.code_language):
+ raise ValueError(f"unknown code_language: {self.code_language}")
+
+ def __hash__(self):
+ """The hash value is build up from URL and code lines. :py:obj:`Code
+ <Result.__eq__>` objects are equal, when the hash values of both objects
+ are equal.
+ """
+ return hash(f"{self.url} {self.codelines}")
+
+ def get_lexer(self):
+ if self.code_language != "<guess>":
+ return get_lexer_by_name(self.code_language)
+
+ src_code = "\n".join([l[1] for l in self.codelines])
+ if self.filename:
+ try:
+ return guess_lexer_for_filename(self.filename, src_code)
+ except ClassNotFound:
+ pass
+ try:
+ return guess_lexer(src_code)
+ except ClassNotFound:
+ pass
+ return get_lexer_by_name("text")
+
+ def HTML(self, **options) -> str: # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ """Rendered HTML, additional options are accepted, for more details have
+ a look at HtmlFormatter_.
+
+ .. _HtmlFormatter: https://pygments.org/docs/formatters/#HtmlFormatter
+ """
+ lexer = self.get_lexer()
+
+ line_no: int = 0 # current line number
+ code_block_start: int = 0 # line where the current code block starts
+ code_block_end: int | None = None # line where the current code ends
+ code_block: list[str] = [] # lines of the current code block
+ html_code_blocks: list[str] = [] # HTML representation of all code blocks
+
+ def _render(**kwargs): # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ for k, default in [
+ ("linenos", "inline"),
+ ("linenostart", code_block_start),
+ ("cssclass", "code-highlight"),
+ ("hl_lines", [hl - code_block_start + 1 for hl in self.hl_lines]),
+ ]:
+ kwargs[k] = kwargs.get(k, default) # pyright: ignore[reportUnknownMemberType]
+
+ # Wrap the code inside <pre> blocks using <code>, as recommended by
+ # the HTML5 specification (default is False). Do we need this?
+ kwargs["wrapcode"] = kwargs.get("wrapcode", True)
+
+ html_code_blocks.append(
+ highlight(
+ "\n".join(code_block),
+ lexer,
+ HtmlFormatter(**kwargs), # pyright: ignore[reportUnknownArgumentType]
+ )
+ )
+
+ for line_no, code_line in self.codelines:
+ if code_block_end is None:
+ # initial start condition
+ code_block_start = line_no
+
+ if code_block_end is not None and code_block_end + 1 != line_no:
+ # new code block is detected, render current code block
+ _render(**options) # pyright: ignore[reportUnknownArgumentType]
+ # reset conditions for next code block, which first line is the
+ # current code line
+ code_block = [code_line]
+ code_block_start = line_no
+ code_block_end = line_no
+ continue
+
+ # add line to the current code block and update last line n
+ code_block.append(code_line)
+ code_block_end = line_no
+
+ # highlight (last) code block
+ _render(**options) # pyright: ignore[reportUnknownArgumentType]
+ return "\n".join(html_code_blocks)
diff --git a/searx/templates/simple/result_templates/code.html b/searx/templates/simple/result_templates/code.html
index bcde94358..6fba99a3e 100644
--- a/searx/templates/simple/result_templates/code.html
+++ b/searx/templates/simple/result_templates/code.html
@@ -10,22 +10,28 @@
{%- endif -%}
{%- if result.repository -%}
<p class="content">{{- '' -}}
- {{ _('repo') }}: {{- ' ' -}}
+ {{ _('Repository') }}: {{- ' ' -}}
<a href="{{ result.repository|safe }}"{{- ' ' -}}
- {% if results_on_new_tab %}
- target="_blank" {{- ' ' -}}
- rel="noopener noreferrer"
- {%- else -%}
- rel="noreferrer"
- {%- endif -%}
- >
- {{- result.repository -}}
+ {% if results_on_new_tab %}
+ target="_blank" {{- ' ' -}}
+ rel="noopener noreferrer"
+ {%- else -%}
+ rel="noreferrer"
+ {%- endif -%}
+ >
+ {{- result.repository -}}
</a>{{- '' -}}
</p>
{%- endif -%}
+{%- if result.filename %}
+ <p class="content">
+ {{ _('Filename') }}: {{ result.filename|safe }}
+ </p>
+{% endif -%}
+
<div dir="ltr" class="codelines">
- {{- result.codelines|code_highlighter(result.code_language, result.hl_lines, result.strip_whitespace, result.strip_new_lines)|safe -}}
+ {{- result.HTML()|safe -}}
</div>
{{- result_sub_footer(result) -}}
diff --git a/tests/unit/test_engine_github_code.py b/tests/unit/test_engine_github_code.py
index d10081f28..13a560713 100644
--- a/tests/unit/test_engine_github_code.py
+++ b/tests/unit/test_engine_github_code.py
@@ -142,29 +142,26 @@ class GithubCodeTests(SearxTestCase):
results = self.ghc.response(response)
expected_results = EngineResults()
expected_results.add(
- expected_results.types.LegacyResult(
- **{
- 'url': "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md",
- 'title': "folke/dot · TODO.md",
- 'content': "☕️ My Dot Files",
- 'repository': "https://github.com/folke/dot",
- 'codelines': [
- (1, "- [x] windows picker"),
- (2, "- [x] toggle cwd / root (LazyVim)"),
- (3, "- [x] dynamic workspace symbol"),
- (4, "- [x] smart stops working after custom"),
- (5, "- [x] edit in empty buffer"),
- (6, "- [x] support toggling line nr for preview"),
- ],
- 'hl_lines': {2, 5, 6},
- 'code_language': "markdown",
- 'template': 'code.html',
- 'strip_whitespace': False,
- 'strip_new_lines': True,
- 'parsed_url': urlparse(
- "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
- ),
- }
+ expected_results.types.Code(
+ url="https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md",
+ title="folke/dot · TODO.md",
+ content="☕️ My Dot Files",
+ repository="https://github.com/folke/dot",
+ codelines=[
+ (1, "- [x] windows picker"),
+ (2, "- [x] toggle cwd / root (LazyVim)"),
+ (3, "- [x] dynamic workspace symbol"),
+ (4, "- [x] smart stops working after custom"),
+ (5, "- [x] edit in empty buffer"),
+ (6, "- [x] support toggling line nr for preview"),
+ ],
+ hl_lines={2, 5, 6},
+ code_language="markdown",
+ strip_whitespace=False,
+ strip_new_lines=True,
+ parsed_url=urlparse(
+ "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
+ ),
)
)
self.assertEqual(results, expected_results)