From 6b57705e50875d9348c855700994395ce8a55b43 Mon Sep 17 00:00:00 2001 From: Filip Mikina Date: Wed, 20 Aug 2025 07:35:31 +0200 Subject: [feat] engines: add GitHub Code Search engine (#5074) This patch adds GitHub Code Search [1] engine to allow querying the codebases. Template code.html is changed to allow passthrough of strip and highlighting options. Engine Searchcode is adjusted to pass filename and not rely on hardcoded extensions. GitHub search code API does not return the exact code line indices, this implementation assigns the code arbitrary numbers starting from 1 (effectively relabeling the code). The API allows for unauth calls, and the default engine settings default to that, although the calls are heavily rate limited. The 'text' lexer is the default pygments lexer when parsing fails. [1] https://docs.github.com/en/rest/search/search?apiVersion=2022-11-28#search-code Co-authored-by: Markus Heiser --- searx/webapp.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'searx/webapp.py') diff --git a/searx/webapp.py b/searx/webapp.py index 2dd7ddb08..9b590eeab 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -181,24 +181,32 @@ def _get_locale_rfc5646(locale): # code-highlighter @app.template_filter('code_highlighter') -def code_highlighter(codelines, language=None): +def code_highlighter(codelines, language=None, hl_lines=None, strip_whitespace=True, strip_new_lines=True): if not language: language = 'text' try: - # find lexer by programming language - lexer = get_lexer_by_name(language, stripall=True) + lexer = get_lexer_by_name(language, stripall=strip_whitespace, stripnl=strip_new_lines) except Exception as e: # pylint: disable=broad-except logger.warning("pygments lexer: %s " % e) # if lexer is not found, using default one - lexer = get_lexer_by_name('text', stripall=True) + lexer = get_lexer_by_name('text', stripall=strip_whitespace, stripnl=strip_new_lines) html_code = '' tmp_code = '' last_line = None line_code_start = None + def offset_hl_lines(hl_lines, start): + """ + hl_lines in pygments are expected to be relative to the input + """ + if hl_lines is None: + return None + + return [line - start + 1 for line in hl_lines] + # parse lines for line, code in codelines: if not last_line: @@ -208,7 +216,12 @@ def code_highlighter(codelines, language=None): if last_line is not None and last_line + 1 != line: # highlight last codepart - formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") + formatter = HtmlFormatter( + linenos='inline', + linenostart=line_code_start, + cssclass="code-highlight", + hl_lines=offset_hl_lines(hl_lines, line_code_start), + ) html_code = html_code + highlight(tmp_code, lexer, formatter) # reset conditions for next codepart @@ -222,7 +235,12 @@ def code_highlighter(codelines, language=None): last_line = line # highlight last codepart - formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") + formatter = HtmlFormatter( + linenos='inline', + linenostart=line_code_start, + cssclass="code-highlight", + hl_lines=offset_hl_lines(hl_lines, line_code_start), + ) html_code = html_code + highlight(tmp_code, lexer, formatter) return html_code -- cgit v1.2.3