From 39ff21237cd5290d89aacf91b931ddb1b3e9c405 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sat, 30 May 2015 17:41:40 +0200 Subject: [enh] google engine : avoid some "sorry google" by adding another cookie : NID. This cookie is specific by hostname. This allow to send request to google.* (according to the search language). Before this commit, request in other languages than english was sent to www.google.com which was redirected to www.google.* The PREF is still use on the www.google.com domain. --- searx/tests/engines/test_google.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'searx/tests') diff --git a/searx/tests/engines/test_google.py b/searx/tests/engines/test_google.py index 2a90fc5ec..b706e511d 100644 --- a/searx/tests/engines/test_google.py +++ b/searx/tests/engines/test_google.py @@ -8,6 +8,12 @@ from searx.testing import SearxTestCase class TestGoogleEngine(SearxTestCase): + def mock_response(self, text): + response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1') + response.search_params = mock.Mock() + response.search_params.get = mock.Mock(return_value='www.google.com') + return response + def test_request(self): query = 'test_query' dicto = defaultdict(dict) @@ -16,14 +22,17 @@ class TestGoogleEngine(SearxTestCase): params = google.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) - self.assertIn('google.com', params['url']) + self.assertIn('google.fr', params['url']) self.assertNotIn('PREF', params['cookies']) + self.assertIn('NID', params['cookies']) self.assertIn('fr', params['headers']['Accept-Language']) dicto['language'] = 'all' params = google.request(query, dicto) + self.assertIn('google.com', params['url']) self.assertIn('en', params['headers']['Accept-Language']) self.assertIn('PREF', params['cookies']) + self.assertIn('NID', params['cookies']) def test_response(self): self.assertRaises(AttributeError, google.response, None) @@ -31,7 +40,7 @@ class TestGoogleEngine(SearxTestCase): self.assertRaises(AttributeError, google.response, '') self.assertRaises(AttributeError, google.response, '[]') - response = mock.Mock(text='') + response = self.mock_response('') self.assertEqual(google.response(response), []) html = """ @@ -124,7 +133,7 @@ class TestGoogleEngine(SearxTestCase):

""" - response = mock.Mock(text=html) + response = self.mock_response(html) results = google.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -137,11 +146,21 @@ class TestGoogleEngine(SearxTestCase):
  • """ - response = mock.Mock(text=html) + response = self.mock_response(html) results = google.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) + response = mock.Mock(text='', url='https://sorry.google.com') + response.search_params = mock.Mock() + response.search_params.get = mock.Mock(return_value='www.google.com') + self.assertRaises(RuntimeWarning, google.response, response) + + response = mock.Mock(text='', url='https://www.google.com/sorry/IndexRedirect') + response.search_params = mock.Mock() + response.search_params.get = mock.Mock(return_value='www.google.com') + self.assertRaises(RuntimeWarning, google.response, response) + def test_parse_images(self): html = """
  • @@ -154,7 +173,7 @@ class TestGoogleEngine(SearxTestCase):
  • """ dom = lxml.html.fromstring(html) - results = google.parse_images(dom) + results = google.parse_images(dom, 'www.google.com') self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['url'], 'http://this.is.the.url/') -- cgit v1.2.3