From 5276219b9d790baeeb505813bb76d0dffa1d2d51 Mon Sep 17 00:00:00 2001 From: Grant Lanham Date: Mon, 19 Aug 2024 23:02:06 -0400 Subject: Fix tineye engine url, datetime parsing, and minor refactor Changes made to tineye engine: 1. Importing logging if TYPE_CHECKING is enabled 2. Remove unecessary try-catch around json parsing the response, as this masked the original error and had no immediate benefit 3. Improve error handling explicitely for status code 422 and 400 upfront, deferring json_parsing only for these status codes and successful status codes 4. Unit test all new applicable changes to ensure compatability --- tests/unit/test_tineye.py | 102 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/unit/test_tineye.py (limited to 'tests') diff --git a/tests/unit/test_tineye.py b/tests/unit/test_tineye.py new file mode 100644 index 000000000..0530b4c5e --- /dev/null +++ b/tests/unit/test_tineye.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring + + +from datetime import datetime +from unittest.mock import Mock +from requests import HTTPError +from searx.engines import load_engines, tineye +from tests import SearxTestCase + + +class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring + + def setUp(self): + load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]) + + def tearDown(self): + load_engines([]) + + def test_status_code_raises(self): + response = Mock() + response.status_code = 401 + response.raise_for_status.side_effect = HTTPError() + self.assertRaises(HTTPError, lambda: tineye.response(response)) + + def test_returns_empty_list_for_422(self): + response = Mock() + response.json.return_value = {} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + with self.assertLogs(tineye.logger) as _dev_null: + results = tineye.response(response) + self.assertEqual(0, len(results)) + + def test_logs_format_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "Invalid image URL"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output)) + + def test_logs_signature_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "NO_SIGNATURE_ERROR"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output)) + + def test_logs_download_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "Download Error"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output)) + + def test_empty_list_for_400(self): + response = Mock() + response.json.return_value = {} + response.status_code = 400 + response.raise_for_status.side_effect = HTTPError() + with self.assertLogs(tineye.logger) as _dev_null: + results = tineye.response(response) + self.assertEqual(0, len(results)) + + def test_logs_description_for_400(self): + description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645' + response = Mock() + response.json.return_value = {"suggestions": {"description": [description], "title": "Oops! We're sorry!"}} + response.status_code = 400 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(description, ','.join(assert_logs_context.output)) + + def test_crawl_date_parses(self): + date_str = '2020-05-25' + date = datetime.strptime(date_str, '%Y-%m-%d') + response = Mock() + response.json.return_value = { + 'matches': [ + { + 'backlinks': [ + { + 'crawl_date': date_str, + } + ] + } + ] + } + response.status_code = 200 + results = tineye.response(response) + self.assertEqual(date, results[0]['publishedDate']) -- cgit v1.2.3