From 6b57705e50875d9348c855700994395ce8a55b43 Mon Sep 17 00:00:00 2001 From: Filip Mikina Date: Wed, 20 Aug 2025 07:35:31 +0200 Subject: [feat] engines: add GitHub Code Search engine (#5074) This patch adds GitHub Code Search [1] engine to allow querying the codebases. Template code.html is changed to allow passthrough of strip and highlighting options. Engine Searchcode is adjusted to pass filename and not rely on hardcoded extensions. GitHub search code API does not return the exact code line indices, this implementation assigns the code arbitrary numbers starting from 1 (effectively relabeling the code). The API allows for unauth calls, and the default engine settings default to that, although the calls are heavily rate limited. The 'text' lexer is the default pygments lexer when parsing fails. [1] https://docs.github.com/en/rest/search/search?apiVersion=2022-11-28#search-code Co-authored-by: Markus Heiser --- tests/unit/settings/test_github_code.yml | 13 +++ tests/unit/test_engine_github_code.py | 170 +++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 tests/unit/settings/test_github_code.yml create mode 100644 tests/unit/test_engine_github_code.py (limited to 'tests') diff --git a/tests/unit/settings/test_github_code.yml b/tests/unit/settings/test_github_code.yml new file mode 100644 index 000000000..2cf039138 --- /dev/null +++ b/tests/unit/settings/test_github_code.yml @@ -0,0 +1,13 @@ +# This SearXNG setup is used in unit tests + +use_default_settings: + + engines: + keep_only: [] + +engines: + + - name: github code + engine: github_code + shortcut: "ghc" + disabled: true diff --git a/tests/unit/test_engine_github_code.py b/tests/unit/test_engine_github_code.py new file mode 100644 index 000000000..d10081f28 --- /dev/null +++ b/tests/unit/test_engine_github_code.py @@ -0,0 +1,170 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring,disable=missing-class-docstring + +import logging +from unittest.mock import Mock +from urllib.parse import urlparse +from parameterized import parameterized + +import searx.engines +from tests import SearxTestCase +from searx.result_types import EngineResults + + +class GithubCodeTests(SearxTestCase): + + TEST_SETTINGS = "test_github_code.yml" + + def setUp(self): + super().setUp() + self.ghc = searx.engines.engines['github code'] + self.ghc.logger.setLevel(logging.INFO) + + def tearDown(self): + searx.search.load_engines([]) + + @parameterized.expand( + [ + [ + [ + { + "fragment": " - [Tab management](#tab-management)\n - [Buffer/window management]" + "(#bufferwindow-management)\n- [🎨 Highlights](#-highlights)", + "matches": [{"indices": [47, 53], "text": "Buffer"}, {"indices": [74, 80], "text": "buffer"}], + }, + { + "fragment": "To conditionally activate plugins, the best solution is to use the\n" + "[LazyVim VSCode extra](https://www.lazyvim.org/extras/vscode). However, " + "`packer.nvim` and `lazy.nvim` have built-in\nsupport for " + "`cond = vim.g.vscode` and `vim-plug` has a", + "matches": [ + {"indices": [68, 75], "text": "LazyVim"}, + {"indices": [102, 109], "text": "lazyvim"}, + ], + }, + ], + [ + " - [Tab management](#tab-management)", + " - [Buffer/window management](#bufferwindow-management)", + "- [🎨 Highlights](#-highlights)", + "To conditionally activate plugins, the best solution is to use the", + "[LazyVim VSCode extra](https://www.lazyvim.org/extras/vscode)." + " However, `packer.nvim` and `lazy.nvim` have built-in", + "support for `cond = vim.g.vscode` and `vim-plug` has a", + ], + {2, 5}, + ], + [ + [ + { + "fragment": "\n| `uf` | Toggle format (global) |\n" + "| `uF` | Toggle format (buffer) |\n" + "| `us` | Toggle spelling |\n", + "matches": [{"indices": [74, 80], "text": "buffer"}], + }, + ], + [ + "| `uf` | Toggle format (global) |", + "| `uF` | Toggle format (buffer) |", + "| `us` | Toggle spelling |", + ], + {2}, + ], + [ + [ + { + "fragment": "\n\n\n1\n2\n3\n4", + "matches": [{"indices": [3, 4], "text": "1"}], + }, + ], + [ + "1", + "2", + "3", + "4", + ], + {1}, + ], + [ + [ + { + "fragment": "placeholder", + "matches": [], + }, + ], + [ + "placeholder", + ], + set(), + ], + ] + ) + def test_code_extraction(self, code_matches, expected_code, expected_highlighted_lines): + code, highlights = self.ghc.extract_code(code_matches=code_matches) + self.assertEqual(code, expected_code) + self.assertEqual(highlights, expected_highlighted_lines) + + def test_transforms_response(self): + response = Mock() + response.json.return_value = { + "items": [ + { + "name": "TODO.md", + "path": "TODO.md", + "html_url": "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md", + "repository": { + "full_name": "folke/dot", + "html_url": "https://github.com/folke/dot", + "description": "☕️ My Dot Files", + }, + "text_matches": [ + { + "object_type": "FileContent", + "property": "content", + "fragment": "- [x] windows picker\n" + "- [x] toggle cwd / root (LazyVim)\n" + "- [x] dynamic workspace symbol", + "matches": [{"indices": [46, 53], "text": "LazyVim"}], + }, + { + "object_type": "FileContent", + "property": "content", + "fragment": "- [x] smart stops working after custom\n" + "- [x] edit in empty buffer\n" + "- [x] support toggling line nr for preview", + "matches": [{"indices": [59, 65], "text": "buffer"}, {"indices": [89, 93], "text": "line"}], + }, + ], + } + ] + } + response.status_code = 200 + results = self.ghc.response(response) + expected_results = EngineResults() + expected_results.add( + expected_results.types.LegacyResult( + **{ + 'url': "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md", + 'title': "folke/dot · TODO.md", + 'content': "☕️ My Dot Files", + 'repository': "https://github.com/folke/dot", + 'codelines': [ + (1, "- [x] windows picker"), + (2, "- [x] toggle cwd / root (LazyVim)"), + (3, "- [x] dynamic workspace symbol"), + (4, "- [x] smart stops working after custom"), + (5, "- [x] edit in empty buffer"), + (6, "- [x] support toggling line nr for preview"), + ], + 'hl_lines': {2, 5, 6}, + 'code_language': "markdown", + 'template': 'code.html', + 'strip_whitespace': False, + 'strip_new_lines': True, + 'parsed_url': urlparse( + "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md" + ), + } + ) + ) + self.assertEqual(results, expected_results) -- cgit v1.2.3