diff options
| -rw-r--r-- | Dockerfile | 25 | ||||
| -rw-r--r-- | searx/engines/bing_images.py | 2 | ||||
| -rw-r--r-- | searx/engines/currency_convert.py | 6 | ||||
| -rw-r--r-- | searx/engines/google.py | 147 | ||||
| -rw-r--r-- | searx/engines/vimeo.py | 12 | ||||
| -rw-r--r-- | searx/engines/www1x.py | 4 | ||||
| -rw-r--r-- | searx/plugins/__init__.py | 4 | ||||
| -rw-r--r-- | searx/plugins/self_info.py (renamed from searx/plugins/self_ip.py) | 13 | ||||
| -rw-r--r-- | searx/settings.yml | 2 | ||||
| -rw-r--r-- | searx/tests/engines/test_bing_images.py | 2 | ||||
| -rw-r--r-- | searx/tests/engines/test_swisscows.py | 4 | ||||
| -rw-r--r-- | searx/tests/engines/test_vimeo.py | 53 | ||||
| -rw-r--r-- | searx/tests/engines/test_www1x.py | 4 | ||||
| -rw-r--r-- | searx/tests/engines/test_yahoo_news.py | 19 | ||||
| -rw-r--r-- | searx/tests/engines/test_youtube_noapi.py | 51 | ||||
| -rw-r--r-- | searx/tests/test_plugins.py | 19 | ||||
| -rw-r--r-- | searx/tests/test_search.py | 25 | ||||
| -rw-r--r-- | searx/webapp.py | 41 |
18 files changed, 363 insertions, 70 deletions
diff --git a/Dockerfile b/Dockerfile index 831a429e2..543c74d0e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,22 @@ -FROM debian:stable +FROM python:2.7-slim + +WORKDIR /app + +RUN useradd searx + +EXPOSE 5000 +CMD ["/usr/local/bin/uwsgi", "--uid", "searx", "--gid", "searx", "--http", ":5000", "-w", "searx.webapp"] RUN apt-get update && \ apt-get install -y --no-install-recommends \ - python-dev python2.7-minimal python-virtualenv \ - python-pybabel python-pip zlib1g-dev \ - libxml2-dev libxslt1-dev build-essential \ - openssl + zlib1g-dev libxml2-dev libxslt1-dev libffi-dev build-essential \ + libssl-dev openssl && \ + rm -rf /var/lib/apt/lists/* -RUN useradd searx +RUN pip install --no-cache uwsgi -WORKDIR /app -RUN pip install uwsgi COPY requirements.txt /app/requirements.txt -RUN pip install -r requirements.txt +RUN pip install --no-cache -r requirements.txt COPY . /app RUN sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" searx/settings.yml - -EXPOSE 5000 -CMD ["/usr/local/bin/uwsgi", "--uid", "searx", "--gid", "searx", "--http", ":5000", "-w", "searx.webapp"] diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index b06a57edc..839b8e5be 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -28,7 +28,7 @@ safesearch = True # search-url base_url = 'https://www.bing.com/' search_string = 'images/search?{query}&count=10&first={offset}' -thumb_url = "http://ts1.mm.bing.net/th?id={ihk}" # no https, bad certificate +thumb_url = "https://www.bing.com/th?id={ihk}" # safesearch definitions safesearch_types = {2: 'STRICT', diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 1ba4575c5..26830a167 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -9,7 +9,7 @@ categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([^.0-9].+)\W*in?\W*([^\.]+)\W*$', re.I) # noqa +parser_re = re.compile(u'^\W*(\d+(?:\.\d+)?)\W*([^.0-9].+)\W+in?\W+([^\.]+)\W*$', re.I) # noqa db = 1 @@ -17,7 +17,7 @@ db = 1 def normalize_name(name): name = name.lower().replace('-', ' ') name = re.sub(' +', ' ', name) - return unicodedata.normalize('NFKD', u"" + name).lower() + return unicodedata.normalize('NFKD', name).lower() def name_to_iso4217(name): @@ -35,7 +35,7 @@ def iso4217_to_name(iso4217, language): def request(query, params): - m = parser_re.match(query) + m = parser_re.match(unicode(query, 'utf8')) if not m: # wrong query return params diff --git a/searx/engines/google.py b/searx/engines/google.py index 785cd5e66..0e78a9e2c 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -8,6 +8,7 @@ # @stable no (HTML can change) # @parse url, title, content, suggestion +import re from urllib import urlencode from urlparse import urlparse, parse_qsl from lxml import html @@ -78,15 +79,22 @@ country_to_hostname = { 'TW': 'www.google.com.tw' # Taiwan } +# osm +url_map = 'https://www.openstreetmap.org/'\ + + '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M' + # search-url search_path = '/search' -maps_path = '/maps/' -redirect_path = '/url' -images_path = '/images' search_url = ('https://{hostname}' + search_path + '?{query}&start={offset}&gbv=1') +# other URLs +map_hostname_start = 'maps.google.' +maps_path = '/maps' +redirect_path = '/url' +images_path = '/images' + # specific xpath variables results_xpath = '//li[@class="g"]' url_xpath = './/h3/a/@href' @@ -95,10 +103,29 @@ content_xpath = './/span[@class="st"]' content_misc_xpath = './/div[@class="f slp"]' suggestion_xpath = '//p[@class="_Bmc"]' +# map : detail location +map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()' +map_phone_xpath = './/div[@class="s"]//table//td[2]/span/span' +map_website_url_xpath = 'h3[2]/a/@href' +map_website_title_xpath = 'h3[2]' + +# map : near the location +map_near = 'table[@class="ts"]//tr' +map_near_title = './/h4' +map_near_url = './/h4/a/@href' +map_near_phone = './/span[@class="nobr"]' + +# images images_xpath = './/div/a' image_url_xpath = './@href' image_img_src_xpath = './img/@src' +# property names +# FIXME : no translation +property_address = "Address" +property_phone = "Phone number" + +# cookies pref_cookie = '' nid_cookie = {} @@ -122,6 +149,11 @@ def get_google_nid_cookie(google_hostname): # remove google-specific tracking-url def parse_url(url_string, google_hostname): + # sanity check + if url_string is None: + return url_string + + # normal case parsed_url = urlparse(url_string) if (parsed_url.netloc in [google_hostname, ''] and parsed_url.path == redirect_path): @@ -151,7 +183,7 @@ def request(query, params): if len(language_array) == 2: country = language_array[1] else: - country = ' ' + country = 'US' language = language_array[0] + ',' + language_array[0] + '-' + country if use_locale_domain: @@ -196,21 +228,32 @@ def response(resp): try: url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) - if (parsed_url.netloc == google_hostname - and (parsed_url.path == search_path - or parsed_url.path.startswith(maps_path))): - # remove the link to google news and google maps - # FIXME : sometimes the URL is https://maps.google.*/maps - # no consequence, the result trigger an exception after which is ignored - continue + + # map result + if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path)) + or (parsed_url.netloc.startswith(map_hostname_start))): + x = result.xpath(map_near) + if len(x) > 0: + # map : near the location + results = results + parse_map_near(parsed_url, x, google_hostname) + else: + # map : detail about a location + results = results + parse_map_detail(parsed_url, result, google_hostname) + + # google news + elif (parsed_url.netloc == google_hostname + and parsed_url.path == search_path): + # skipping news results + pass # images result - if (parsed_url.netloc == google_hostname - and parsed_url.path == images_path): + elif (parsed_url.netloc == google_hostname + and parsed_url.path == images_path): # only thumbnail image provided, # so skipping image results # results = results + parse_images(result, google_hostname) pass + else: # normal result content = extract_text_from_dom(result, content_xpath) @@ -222,8 +265,9 @@ def response(resp): # append result results.append({'url': url, 'title': title, - 'content': content}) - except Exception: + 'content': content + }) + except: continue # parse suggestion @@ -246,6 +290,77 @@ def parse_images(result, google_hostname): 'title': '', 'content': '', 'img_src': img_src, - 'template': 'images.html'}) + 'template': 'images.html' + }) + + return results + + +def parse_map_near(parsed_url, x, google_hostname): + results = [] + + for result in x: + title = extract_text_from_dom(result, map_near_title) + url = parse_url(extract_text_from_dom(result, map_near_url), google_hostname) + attributes = [] + phone = extract_text_from_dom(result, map_near_phone) + add_attributes(attributes, property_phone, phone, 'tel:' + phone) + results.append({'title': title, + 'url': url, + 'content': attributes_to_html(attributes) + }) return results + + +def parse_map_detail(parsed_url, result, google_hostname): + results = [] + + # try to parse the geoloc + m = re.search('@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path) + if m is None: + m = re.search('ll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query) + + if m is not None: + # geoloc found (ignored) + lon = float(m.group(2)) # noqa + lat = float(m.group(1)) # noqa + zoom = int(m.group(3)) # noqa + + # attributes + attributes = [] + address = extract_text_from_dom(result, map_address_xpath) + phone = extract_text_from_dom(result, map_phone_xpath) + add_attributes(attributes, property_address, address, 'geo:' + str(lat) + ',' + str(lon)) + add_attributes(attributes, property_phone, phone, 'tel:' + phone) + + # title / content / url + website_title = extract_text_from_dom(result, map_website_title_xpath) + content = extract_text_from_dom(result, content_xpath) + website_url = parse_url(extract_text_from_dom(result, map_website_url_xpath), google_hostname) + + # add a result if there is a website + if website_url is not None: + results.append({'title': website_title, + 'content': (content + '<br />' if content is not None else '') + + attributes_to_html(attributes), + 'url': website_url + }) + + return results + + +def add_attributes(attributes, name, value, url): + if value is not None and len(value) > 0: + attributes.append({'label': name, 'value': value, 'url': url}) + + +def attributes_to_html(attributes): + retval = '<table class="table table-striped">' + for a in attributes: + value = a.get('value') + if 'url' in a: + value = '<a href="' + a.get('url') + '">' + value + '</a>' + retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>' + retval = retval + '</table>' + return retval diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index 0dcc65b7c..517ac1c44 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -27,11 +27,11 @@ base_url = 'https://vimeo.com' search_url = base_url + '/search/page:{pageno}?{query}' # specific xpath variables -results_xpath = '//div[@id="browse_content"]/ol/li' -url_xpath = './a/@href' -title_xpath = './a/div[@class="data"]/p[@class="title"]' -content_xpath = './a/img/@src' -publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' +results_xpath = '//div[contains(@class,"results_grid")]/ul/li' +url_xpath = './/a/@href' +title_xpath = './/span[@class="title"]' +thumbnail_xpath = './/img[@class="js-clip_thumbnail_image"]/@src' +publishedDate_xpath = './/time/attribute::datetime' embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\ 'width="540" height="304" frameborder="0" ' +\ @@ -58,7 +58,7 @@ def response(resp): videoid = result.xpath(url_xpath)[0] url = base_url + videoid title = p.unescape(extract_text(result.xpath(title_xpath))) - thumbnail = extract_text(result.xpath(content_xpath)[0]) + thumbnail = extract_text(result.xpath(thumbnail_xpath)[0]) publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0])) embedded = embedded_url.format(videoid=videoid) diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index 12868ad22..ddb79bfea 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -20,8 +20,8 @@ import re categories = ['images'] paging = False -# search-url, no HTTPS (there is a valid certificate for https://api2.1x.com/ ) -base_url = 'http://1x.com' +# search-url +base_url = 'https://1x.com' search_url = base_url+'/backend/search.php?{query}' diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index d61eb6073..a4d7ad8a8 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -20,7 +20,7 @@ from searx import logger logger = logger.getChild('plugins') from searx.plugins import (https_rewrite, - self_ip, + self_info, search_on_category_select, tracker_url_remover) @@ -72,6 +72,6 @@ class PluginStore(): plugins = PluginStore() plugins.register(https_rewrite) -plugins.register(self_ip) +plugins.register(self_info) plugins.register(search_on_category_select) plugins.register(tracker_url_remover) diff --git a/searx/plugins/self_ip.py b/searx/plugins/self_info.py index 5184ea4cf..5ca994526 100644 --- a/searx/plugins/self_ip.py +++ b/searx/plugins/self_info.py @@ -15,11 +15,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, <asciimoo@gmail.com> ''' from flask.ext.babel import gettext -name = "Self IP" -description = gettext('Display your source IP address if the query expression is "ip"') +import re +name = "Self Informations" +description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".') default_on = True +# Self User Agent regex +p = re.compile('.*user[ -]agent.*', re.IGNORECASE) + + # attach callback to the post search hook # request: flask request object # ctx: the whole local context of the pre search hook @@ -32,4 +37,8 @@ def post_search(request, ctx): ip = request.remote_addr ctx['search'].answers.clear() ctx['search'].answers.add(ip) + elif p.match(ctx['search'].query): + ua = request.user_agent + ctx['search'].answers.clear() + ctx['search'].answers.add(ua) return True diff --git a/searx/settings.yml b/searx/settings.yml index 7e1a16ab8..021068eb2 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -172,6 +172,7 @@ engines: engine : qwant shortcut : qw categories : general + disabled : True - name : qwant images engine : qwant @@ -236,6 +237,7 @@ engines: - name : swisscows engine : swisscows shortcut : sw + disabled : True - name : twitter engine : twitter diff --git a/searx/tests/engines/test_bing_images.py b/searx/tests/engines/test_bing_images.py index a1d96b06e..f869da79d 100644 --- a/searx/tests/engines/test_bing_images.py +++ b/searx/tests/engines/test_bing_images.py @@ -59,7 +59,7 @@ oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%2 self.assertEqual(results[0]['title'], 'Test Query') self.assertEqual(results[0]['url'], 'http://www.page.url/') self.assertEqual(results[0]['content'], '') - self.assertEqual(results[0]['thumbnail_src'], 'http://ts1.mm.bing.net/th?id=HN.608003696942779811') + self.assertEqual(results[0]['thumbnail_src'], 'https://www.bing.com/th?id=HN.608003696942779811') self.assertEqual(results[0]['img_src'], 'http://test.url/Test%20Query.jpg') html = """ diff --git a/searx/tests/engines/test_swisscows.py b/searx/tests/engines/test_swisscows.py index 926ba885e..3b4ce7b0f 100644 --- a/searx/tests/engines/test_swisscows.py +++ b/searx/tests/engines/test_swisscows.py @@ -23,6 +23,10 @@ class TestSwisscowsEngine(SearxTestCase): self.assertTrue('uiLanguage=browser' in params['url']) self.assertTrue('region=browser' in params['url']) + dicto['category'] = 'images' + params = swisscows.request(query, dicto) + self.assertIn('image', params['url']) + def test_response(self): self.assertRaises(AttributeError, swisscows.response, None) self.assertRaises(AttributeError, swisscows.response, []) diff --git a/searx/tests/engines/test_vimeo.py b/searx/tests/engines/test_vimeo.py index dad7239b4..50b1cb563 100644 --- a/searx/tests/engines/test_vimeo.py +++ b/searx/tests/engines/test_vimeo.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from collections import defaultdict import mock from searx.engines import vimeo @@ -25,26 +26,42 @@ class TestVimeoEngine(SearxTestCase): self.assertEqual(vimeo.response(response), []) html = """ - <div id="browse_content" class="" data-search-id="696d5f8366914ec4ffec33cf7652de384976d4f4"> - <ol class="js-browse_list clearfix browse browse_videos browse_videos_thumbnails kane" + <div id="browse_content" class="results_grid" data-search-id="696d5f8366914ec4ffec33cf7652de384976d4f4"> + <ul class="js-browse_list clearfix browse browse_videos browse_videos_thumbnails kane" data-stream="c2VhcmNoOjo6ZGVzYzp7InF1ZXJ5IjoidGVzdCJ9"> - <li id="clip_100785455" data-start-page="/search/page:1/sort:relevant/" data-position="1"> - <a href="/videoid" title="Futurama 3d (test shot)"> - <img src="http://image.url.webp" - srcset="http://i.vimeocdn.com/video/482375085_590x332.webp 2x" alt="" - class="thumbnail thumbnail_lg_wide"> - <div class="data"> - <p class="title"> - This is the title - </p> - <p class="meta"> - <time datetime="2014-07-15T04:16:27-04:00" - title="mardi 15 juillet 2014 04:16">Il y a 6 mois</time> - </p> - </div> - </a> + <li data-position="7" data-result-id="clip_79600943"> + <div class="clip_thumbnail"> + <a href="/videoid" class="js-result_url"> + <div class="thumbnail_wrapper"> + <img src="http://image.url.webp" class="js-clip_thumbnail_image"> + <div class="overlay overlay_clip_meta"> + <div class="meta_data_footer"> + <span class="clip_upload_date"> + <time datetime="2013-11-17T08:49:09-05:00" + title="dimanche 17 novembre 2013 08:49">Il y a 1 an</time> + </span> + <span class="clip_likes"> + <img src="https://f.vimeocdn.com/images_v6/svg/heart-icon.svg">2 215 + </span> + <span class="clip_comments"> + <img src="https://f.vimeocdn.com/images_v6/svg/comment-icon.svg">75 + </span> + <span class="overlay meta_data_footer clip_duration">01:12</span> + </div> + </div> + </div> + <span class="title">This is the title</span> + </a> + </div> + <div class="clip_thumbnail_attribution"> + <a href="/fedorshmidt"> + <img src="https://i.vimeocdn.com/portrait/6628061_100x100.jpg" class="avatar"> + <span class="display_name">Fedor Shmidt</span> + </a> + <span class="plays">2,1M lectures</span> + </div> </li> - </ol> + </ul> </div> """ response = mock.Mock(text=html) diff --git a/searx/tests/engines/test_www1x.py b/searx/tests/engines/test_www1x.py index ab4f282c1..9df8de6bf 100644 --- a/searx/tests/engines/test_www1x.py +++ b/searx/tests/engines/test_www1x.py @@ -51,7 +51,7 @@ class TestWww1xEngine(SearxTestCase): results = www1x.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) - self.assertEqual(results[0]['url'], 'http://1x.com/photo/123456') - self.assertEqual(results[0]['thumbnail_src'], 'http://1x.com/images/user/testimage-123456.jpg') + self.assertEqual(results[0]['url'], 'https://1x.com/photo/123456') + self.assertEqual(results[0]['thumbnail_src'], 'https://1x.com/images/user/testimage-123456.jpg') self.assertEqual(results[0]['content'], '') self.assertEqual(results[0]['template'], 'images.html') diff --git a/searx/tests/engines/test_yahoo_news.py b/searx/tests/engines/test_yahoo_news.py index 94d819d61..4d7fc0a10 100644 --- a/searx/tests/engines/test_yahoo_news.py +++ b/searx/tests/engines/test_yahoo_news.py @@ -29,6 +29,13 @@ class TestYahooNewsEngine(SearxTestCase): self.assertIn('en', params['cookies']['sB']) self.assertIn('en', params['url']) + def test_sanitize_url(self): + url = "test.url" + self.assertEqual(url, yahoo_news.sanitize_url(url)) + + url = "www.yahoo.com/;_ylt=test" + self.assertEqual("www.yahoo.com/", yahoo_news.sanitize_url(url)) + def test_response(self): self.assertRaises(AttributeError, yahoo_news.response, None) self.assertRaises(AttributeError, yahoo_news.response, []) @@ -57,7 +64,17 @@ class TestYahooNewsEngine(SearxTestCase): This is the content </div> </li> - </div> + <li class="first"> + <div class="compTitle"> + <h3> + <a class="yschttl spt" target="_blank"> + </a> + </h3> + </div> + <div class="compText"> + </div> + </li> + </ol> """ response = mock.Mock(text=html) results = yahoo_news.response(response) diff --git a/searx/tests/engines/test_youtube_noapi.py b/searx/tests/engines/test_youtube_noapi.py index b715ed2f1..9fa8fd20e 100644 --- a/searx/tests/engines/test_youtube_noapi.py +++ b/searx/tests/engines/test_youtube_noapi.py @@ -94,6 +94,57 @@ class TestYoutubeNoAPIEngine(SearxTestCase): html = """ <ol id="item-section-063864" class="item-section"> <li> + <div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile" + data-context-item-id="DIVZCPfAOeM" + data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB"> + <div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto"> + <a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link" + data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"> + <div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg" + width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a> + <span class="thumb-menu dark-overflow-action-menu video-actions"> + </span> + </div> + <div class="yt-lockup-content"> + <h3 class="yt-lockup-title"> + <span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span> + </h3> + <div class="yt-lockup-byline">de + <a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard" + data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA" + data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta"> + <ul class="yt-lockup-meta-info"> + <li>il y a 20 heures</li> + <li>8 424 vues</li> + </ul> + </div> + <div class="yt-lockup-badges"> + <ul class="yt-badge-list "> + <li class="yt-badge-item" > + <span class="yt-badge">Nouveauté</span> + </li> + <li class="yt-badge-item" ><span class="yt-badge " >HD</span></li> + </ul> + </div> + <div class="yt-lockup-action-menu yt-uix-menu-container"> + <div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded" + data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu"> + </div> + </div> + </div> + </div> + </div> + </li> + </ol> + """ + response = mock.Mock(text=html) + results = youtube_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + + html = """ + <ol id="item-section-063864" class="item-section"> + <li> </li> </ol> """ diff --git a/searx/tests/test_plugins.py b/searx/tests/test_plugins.py index 8dcad1142..c5171127c 100644 --- a/searx/tests/test_plugins.py +++ b/searx/tests/test_plugins.py @@ -38,10 +38,11 @@ class SelfIPTest(SearxTestCase): def test_PluginStore_init(self): store = plugins.PluginStore() - store.register(plugins.self_ip) + store.register(plugins.self_info) self.assertTrue(len(store.plugins) == 1) + # IP test request = Mock(user_plugins=store.plugins, remote_addr='127.0.0.1') request.headers.getlist.return_value = [] @@ -49,3 +50,19 @@ class SelfIPTest(SearxTestCase): query='ip')} store.call('post_search', request, ctx) self.assertTrue('127.0.0.1' in ctx['search'].answers) + + # User agent test + request = Mock(user_plugins=store.plugins, + user_agent='Mock') + request.headers.getlist.return_value = [] + ctx = {'search': Mock(answers=set(), + query='user-agent')} + store.call('post_search', request, ctx) + self.assertTrue('Mock' in ctx['search'].answers) + ctx = {'search': Mock(answers=set(), + query='user agent')} + store.call('post_search', request, ctx) + self.assertTrue('Mock' in ctx['search'].answers) + ctx = {'search': Mock(answers=set(), + query='What is my User-Agent?')} + store.call('post_search', request, ctx) diff --git a/searx/tests/test_search.py b/searx/tests/test_search.py new file mode 100644 index 000000000..89d0b620d --- /dev/null +++ b/searx/tests/test_search.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- + +from searx.search import score_results +from searx.testing import SearxTestCase + + +def fake_result(url='https://aa.bb/cc?dd=ee#ff', + title='aaa', + content='bbb', + engine='wikipedia'): + return {'url': url, + 'title': title, + 'content': content, + 'engine': engine} + + +class ScoreResultsTestCase(SearxTestCase): + + def test_empty(self): + self.assertEqual(score_results(dict()), []) + + def test_urlparse(self): + results = score_results(dict(a=[fake_result(url='https://aa.bb/cc?dd=ee#ff')])) + parsed_url = results[0]['parsed_url'] + self.assertEqual(parsed_url.query, 'dd=ee') diff --git a/searx/webapp.py b/searx/webapp.py index dbcbb4c0d..fb7157b47 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -697,7 +697,7 @@ def image_proxy(): return '', 400 if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug('image-proxy: wrong content-type: {0}'.format(resp.get('content-type'))) + logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) return '', 400 img = '' @@ -781,10 +781,45 @@ def run(): ) -application = app +class ReverseProxyPathFix(object): + '''Wrap the application in this middleware and configure the + front-end server to add these headers, to let you quietly bind + this to a URL other than / and to an HTTP scheme that is + different than what is used locally. + + http://flask.pocoo.org/snippets/35/ + + In nginx: + location /myprefix { + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Scheme $scheme; + proxy_set_header X-Script-Name /myprefix; + } + + :param app: the WSGI application + ''' + def __init__(self, app): + self.app = app -app.wsgi_app = ProxyFix(application.wsgi_app) + def __call__(self, environ, start_response): + script_name = environ.get('HTTP_X_SCRIPT_NAME', '') + if script_name: + environ['SCRIPT_NAME'] = script_name + path_info = environ['PATH_INFO'] + if path_info.startswith(script_name): + environ['PATH_INFO'] = path_info[len(script_name):] + scheme = environ.get('HTTP_X_SCHEME', '') + if scheme: + environ['wsgi.url_scheme'] = scheme + return self.app(environ, start_response) + + +application = app +# patch app to handle non root url-s behind proxy & wsgi +app.wsgi_app = ReverseProxyPathFix(ProxyFix(application.wsgi_app)) if __name__ == "__main__": run() |