summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarIT.de>2021-11-27 12:59:39 +0100
committerGitHub <noreply@github.com>2021-11-27 12:59:39 +0100
commit11ba7f294ff1db37822dd5e7ec75cab5b30deccd (patch)
treeb690dce6ca45de6dc353ff010b36436453efb58b
parentf0db33e14b5b443eedc25f5aeba6cd330476c851 (diff)
parent6e06618e0c44b52d5d322a7f5d35f1a8a7a7d247 (diff)
Merge pull request #554 from return42/fix-google-video
[fix] google-videos engine: ignore news articles
-rw-r--r--searx/engines/google_videos.py12
1 files changed, 5 insertions, 7 deletions
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index abf046f4c..77b0ab260 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -154,25 +154,23 @@ def response(resp):
# parse results
for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
- # google *sections*
+ # ignore google *sections*
if extract_text(eval_xpath(result, g_section_with_header)):
logger.debug("ingoring <g-section-with-header>")
continue
- title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
- url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
-
- # <img id="vidthumb1" ...>
+ # ingnore articles without an image id / e.g. news articles
img_id = eval_xpath_getindex(result, './/g-img/img/@id', 0, default=None)
if img_id is None:
- logger.error("no img_id for: %s" % result)
+ logger.error("no img_id found in item %s (news article?)", len(results) + 1)
continue
img_src = vidthumb_imgdata.get(img_id, None)
if not img_src:
- logger.error("no vidthumb imgdata for: %s" % img_id)
img_src = thumbs_src.get(img_id, "")
+ title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
+ url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
length = extract_text(eval_xpath(
result, './/div[contains(@class, "P7xzyf")]/span/span'))
c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)