diff options
| -rw-r--r-- | searx/engines/spotify.py | 60 | ||||
| -rw-r--r-- | searx/poolrequests.py | 53 | ||||
| -rw-r--r-- | searx/settings.yml | 21 | ||||
| -rw-r--r-- | searx/tests/engines/test_blekko_images.py | 11 | ||||
| -rw-r--r-- | searx/tests/engines/test_google_images.py | 11 | ||||
| -rw-r--r-- | searx/tests/engines/test_spotify.py | 124 | ||||
| -rw-r--r-- | searx/tests/engines/test_yahoo.py | 14 | ||||
| -rw-r--r-- | searx/tests/test_engines.py | 1 | ||||
| -rw-r--r-- | versions.cfg | 2 |
9 files changed, 262 insertions, 35 deletions
diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py new file mode 100644 index 000000000..61f3721ec --- /dev/null +++ b/searx/engines/spotify.py @@ -0,0 +1,60 @@ +## Spotify (Music) +# +# @website https://spotify.com +# @provide-api yes (https://developer.spotify.com/web-api/search-item/) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, embedded + +from json import loads +from urllib import urlencode + +# engine dependent config +categories = ['music'] +paging = True + +# search-url +url = 'https://api.spotify.com/' +search_url = url + 'v1/search?{query}&type=track&offset={offset}' + +embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{audioid}"\ + width="300" height="80" frameborder="0" allowtransparency="true"></iframe>' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 20 + + params['url'] = search_url.format(query=urlencode({'q': query}), + offset=offset) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_res = loads(resp.text) + + # parse results + for result in search_res.get('tracks', {}).get('items', {}): + if result['type'] == 'track': + title = result['name'] + url = result['external_urls']['spotify'] + content = result['artists'][0]['name'] +\ + " • " +\ + result['album']['name'] +\ + " • " + result['name'] + embedded = embedded_url.format(audioid=result['id']) + + # append result + results.append({'url': url, + 'title': title, + 'embedded': embedded, + 'content': content}) + + # return results + return results diff --git a/searx/poolrequests.py b/searx/poolrequests.py index 65853c2e9..b74d43a02 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -1,20 +1,63 @@ import requests +from itertools import cycle +from searx import settings -the_http_adapter = requests.adapters.HTTPAdapter(pool_connections=100) -the_https_adapter = requests.adapters.HTTPAdapter(pool_connections=100) +class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter): + + def __init__(self, pool_connections=requests.adapters.DEFAULT_POOLSIZE, + pool_maxsize=requests.adapters.DEFAULT_POOLSIZE, + max_retries=requests.adapters.DEFAULT_RETRIES, + pool_block=requests.adapters.DEFAULT_POOLBLOCK, + **conn_params): + if max_retries == requests.adapters.DEFAULT_RETRIES: + self.max_retries = requests.adapters.Retry(0, read=False) + else: + self.max_retries = requests.adapters.Retry.from_int(max_retries) + self.config = {} + self.proxy_manager = {} + + super(requests.adapters.HTTPAdapter, self).__init__() + + self._pool_connections = pool_connections + self._pool_maxsize = pool_maxsize + self._pool_block = pool_block + self._conn_params = conn_params + + self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block, **conn_params) + + def __setstate__(self, state): + # Can't handle by adding 'proxy_manager' to self.__attrs__ because + # because self.poolmanager uses a lambda function, which isn't pickleable. + self.proxy_manager = {} + self.config = {} + + for attr, value in state.items(): + setattr(self, attr, value) + + self.init_poolmanager(self._pool_connections, self._pool_maxsize, + block=self._pool_block, **self._conn_params) + + +if settings.get('source_ips'): + http_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=100, source_address=(source_ip, 0)) + for source_ip in settings['source_ips']) + https_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=100, source_address=(source_ip, 0)) + for source_ip in settings['source_ips']) +else: + http_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=100), )) + https_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=100), )) class SessionSinglePool(requests.Session): def __init__(self): - global the_https_adapter, the_http_adapter super(SessionSinglePool, self).__init__() # reuse the same adapters self.adapters.clear() - self.mount('https://', the_https_adapter) - self.mount('http://', the_http_adapter) + self.mount('https://', next(https_adapters)) + self.mount('http://', next(http_adapters)) def close(self): """Call super, but clear adapters since there are managed globaly""" diff --git a/searx/settings.yml b/searx/settings.yml index 8e2833ef0..b2689bd13 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -11,6 +11,12 @@ server: image_proxy : False # Proxying image results through searx default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section +# uncomment below section only if you have more than one network interface +# which can be the source of outgoing search requests +#source_ips: +# - 1.1.1.1 +# - 1.1.1.2 + engines: - name : wikipedia engine : mediawiki @@ -78,12 +84,6 @@ engines: # shortcut : fa # api_key : 'apikey' # required! -# down - website is under criminal investigation by the UK -# - name : filecrop -# engine : filecrop -# categories : files -# shortcut : fc - - name : 500px engine : www500px shortcut : px @@ -103,11 +103,6 @@ engines: # Or you can use the html non-stable engine, activated by default engine : flickr_noapi - - name : general-file - engine : generalfile - shortcut : gf - disabled : True - - name : gigablast engine : gigablast shortcut : gb @@ -195,6 +190,10 @@ engines: shortcut : scc disabled : True + - name : spotify + engine : spotify + shortcut : stf + - name : subtitleseeker engine : subtitleseeker shortcut : ss diff --git a/searx/tests/engines/test_blekko_images.py b/searx/tests/engines/test_blekko_images.py index 793fadbad..beb0853e3 100644 --- a/searx/tests/engines/test_blekko_images.py +++ b/searx/tests/engines/test_blekko_images.py @@ -12,9 +12,14 @@ class TestBlekkoImagesEngine(SearxTestCase): dicto['pageno'] = 0 dicto['safesearch'] = 1 params = blekko_images.request(query, dicto) - self.assertTrue('url' in params) - self.assertTrue(query in params['url']) - self.assertTrue('blekko.com' in params['url']) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('blekko.com', params['url']) + self.assertIn('page', params['url']) + + dicto['pageno'] = 1 + params = blekko_images.request(query, dicto) + self.assertNotIn('page', params['url']) def test_response(self): self.assertRaises(AttributeError, blekko_images.response, None) diff --git a/searx/tests/engines/test_google_images.py b/searx/tests/engines/test_google_images.py index 6870ff52f..32d133334 100644 --- a/searx/tests/engines/test_google_images.py +++ b/searx/tests/engines/test_google_images.py @@ -11,9 +11,14 @@ class TestGoogleImagesEngine(SearxTestCase): dicto = defaultdict(dict) dicto['pageno'] = 1 params = google_images.request(query, dicto) - self.assertTrue('url' in params) - self.assertTrue(query in params['url']) - self.assertTrue('googleapis.com' in params['url']) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('googleapis.com', params['url']) + self.assertIn('safe=on', params['url']) + + dicto['safesearch'] = 0 + params = google_images.request(query, dicto) + self.assertIn('safe=off', params['url']) def test_response(self): self.assertRaises(AttributeError, google_images.response, None) diff --git a/searx/tests/engines/test_spotify.py b/searx/tests/engines/test_spotify.py new file mode 100644 index 000000000..fd274abbd --- /dev/null +++ b/searx/tests/engines/test_spotify.py @@ -0,0 +1,124 @@ +from collections import defaultdict +import mock +from searx.engines import spotify +from searx.testing import SearxTestCase + + +class TestSpotifyEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = spotify.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('spotify.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, spotify.response, None) + self.assertRaises(AttributeError, spotify.response, []) + self.assertRaises(AttributeError, spotify.response, '') + self.assertRaises(AttributeError, spotify.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(spotify.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(spotify.response(response), []) + + json = """ + { + "tracks": { + "href": "https://api.spotify.com/v1/search?query=nosfell&offset=0&limit=20&type=track", + "items": [ + { + "album": { + "album_type": "album", + "external_urls": { + "spotify": "https://open.spotify.com/album/5c9ap1PBkSGLxT3J73toxA" + }, + "href": "https://api.spotify.com/v1/albums/5c9ap1PBkSGLxT3J73toxA", + "id": "5c9ap1PBkSGLxT3J73toxA", + "name": "Album Title", + "type": "album", + "uri": "spotify:album:5c9ap1PBkSGLxT3J73toxA" + }, + "artists": [ + { + "external_urls": { + "spotify": "https://open.spotify.com/artist/0bMc6b75FfZEpQHG1jifKu" + }, + "href": "https://api.spotify.com/v1/artists/0bMc6b75FfZEpQHG1jifKu", + "id": "0bMc6b75FfZEpQHG1jifKu", + "name": "Artist Name", + "type": "artist", + "uri": "spotify:artist:0bMc6b75FfZEpQHG1jifKu" + } + ], + "disc_number": 1, + "duration_ms": 202386, + "explicit": false, + "external_ids": { + "isrc": "FRV640600067" + }, + "external_urls": { + "spotify": "https://open.spotify.com/track/2GzvFiedqW8hgqUpWcASZa" + }, + "href": "https://api.spotify.com/v1/tracks/2GzvFiedqW8hgqUpWcASZa", + "id": "1000", + "is_playable": true, + "name": "Title of track", + "popularity": 6, + "preview_url": "https://p.scdn.co/mp3-preview/7b8ecda580965a066b768c2647f877e43f7b1a0a", + "track_number": 3, + "type": "track", + "uri": "spotify:track:2GzvFiedqW8hgqUpWcASZa" + } + ], + "limit": 20, + "next": "https://api.spotify.com/v1/search?query=nosfell&offset=20&limit=20&type=track", + "offset": 0, + "previous": null, + "total": 107 + } + } + """ + response = mock.Mock(text=json) + results = spotify.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title of track') + self.assertEqual(results[0]['url'], 'https://open.spotify.com/track/2GzvFiedqW8hgqUpWcASZa') + self.assertEqual(results[0]['content'], 'Artist Name • Album Title • Title of track') + self.assertIn('1000', results[0]['embedded']) + + json = """ + { + "tracks": { + "href": "https://api.spotify.com/v1/search?query=nosfell&offset=0&limit=20&type=track", + "items": [ + { + "href": "https://api.spotify.com/v1/tracks/2GzvFiedqW8hgqUpWcASZa", + "id": "1000", + "is_playable": true, + "name": "Title of track", + "popularity": 6, + "preview_url": "https://p.scdn.co/mp3-preview/7b8ecda580965a066b768c2647f877e43f7b1a0a", + "track_number": 3, + "type": "album", + "uri": "spotify:track:2GzvFiedqW8hgqUpWcASZa" + } + ], + "limit": 20, + "next": "https://api.spotify.com/v1/search?query=nosfell&offset=20&limit=20&type=track", + "offset": 0, + "previous": null, + "total": 107 + } + } + """ + response = mock.Mock(text=json) + results = spotify.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_yahoo.py b/searx/tests/engines/test_yahoo.py index cdd6fda29..11ef9db22 100644 --- a/searx/tests/engines/test_yahoo.py +++ b/searx/tests/engines/test_yahoo.py @@ -75,12 +75,6 @@ class TestYahooEngine(SearxTestCase): <li> <div class="dd algo lst Sr"> <div class="compTitle"> - <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA; - _ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10 - /RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-" - target="_blank" data-bid="54e712e136926"> - This is the second <b><b>title</b></b></a> - </h3> </div> <div class="compText aAbs"> <p class="lh-18">This is the second content</p> @@ -102,16 +96,12 @@ class TestYahooEngine(SearxTestCase): """ response = mock.Mock(text=html) results = yahoo.response(response) - print results self.assertEqual(type(results), list) - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 2) self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['url'], 'https://this.is.the.url/') self.assertEqual(results[0]['content'], 'This is the content') - self.assertEqual(results[1]['title'], 'This is the second title') - self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/') - self.assertEqual(results[1]['content'], 'This is the second content') - self.assertEqual(results[2]['suggestion'], 'This is the suggestion') + self.assertEqual(results[1]['suggestion'], 'This is the suggestion') html = """ <ol class="reg mb-15 searchCenterMiddle"> diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 9b1c12cb1..5770458f3 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -28,6 +28,7 @@ from searx.tests.engines.test_piratebay import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa +from searx.tests.engines.test_spotify import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_startpage import * # noqa from searx.tests.engines.test_subtitleseeker import * # noqa diff --git a/versions.cfg b/versions.cfg index 7f1734908..ef8082281 100644 --- a/versions.cfg +++ b/versions.cfg @@ -22,7 +22,7 @@ plone.testing = 4.0.8 pyflakes = 0.7.3 pytz = 2013b pyyaml = 3.10 -requests = 2.2.0 +requests = 2.5.3 robotframework-debuglibrary = 0.3 robotframework-httplibrary = 0.4.2 robotframework-selenium2library = 1.5.0 |