diff options
| author | Markus Heiser <markus.heiser@darmarit.de> | 2022-08-10 18:55:31 +0200 |
|---|---|---|
| committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-08-10 18:55:31 +0200 |
| commit | eb02cc77c56c8e93d305ee8aafc6e80c326e838f (patch) | |
| tree | 02c6a0279aeb686485affbd79fab8ae89fc01911 | |
| parent | b9f16a77dbb9ef2f78719055b33baafef539b6d8 (diff) | |
[fix] google - simplify XPath selectors to fetch more results
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
| -rw-r--r-- | searx/engines/google.py | 15 |
1 files changed, 4 insertions, 11 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py index 14bd26fc3..9cb936ccf 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -112,21 +112,14 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} # specific xpath variables # ------------------------ -# google results are grouped into <div class="jtfYYd ..." ../> -results_xpath = '//div[contains(@class, "jtfYYd")]' +results_xpath = '//div[contains(@class, "MjjYud")]' +title_xpath = './/h3[1]' +href_xpath = './/a/@href' +content_xpath = './/div[@data-content-feature=1]' # google *sections* are no usual *results*, we ignore them g_section_with_header = './g-section-with-header' -# the title is a h3 tag relative to the result group -title_xpath = './/h3[1]' - -# in the result group there is <div class="yuRUbf" ../> it's first child is a <a -# href=...> -href_xpath = './/div[@class="yuRUbf"]//a/@href' - -# in the result group there is <div class="VwiC3b ..." ../> containing the *content* -content_xpath = './/div[contains(@class, "VwiC3b")]' # Suggestions are links placed in a *card-section*, we extract only the text # from the links not the links itself. |