diff options
48 files changed, 2379 insertions, 247 deletions
diff --git a/AUTHORS.rst b/AUTHORS.rst index c5047438a..3605332ea 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -42,3 +42,5 @@ generally made searx better: - Noemi Vanyi - Kang-min Liu - Kirill Isakov +- Guilhem Bonnefille +- Marc Abonce Seguin diff --git a/README.rst b/README.rst index c0993157d..6563fe8ab 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ Installation ``git clone git@github.com:asciimoo/searx.git && cd searx`` - install dependencies: ``./manage.sh update_packages`` - edit your - `settings.yml <https://github.com/asciimoo/searx/blob/master/settings.yml>`__ + `settings.yml <https://github.com/asciimoo/searx/blob/master/searx/settings.yml>`__ (set your ``secret_key``!) - run ``python searx/webapp.py`` to start the application diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 26830a167..b0ffb490a 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -9,13 +9,13 @@ categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(u'^\W*(\d+(?:\.\d+)?)\W*([^.0-9].+)\W+in?\W+([^\.]+)\W*$', re.I) # noqa +parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa db = 1 def normalize_name(name): - name = name.lower().replace('-', ' ') + name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() diff --git a/searx/engines/doku.py b/searx/engines/doku.py new file mode 100644 index 000000000..93867fd0d --- /dev/null +++ b/searx/engines/doku.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://localhost:8090' +search_url = '/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 793e97d22..208ccca28 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,5 +1,6 @@ import json from urllib import urlencode +from re import compile, sub from lxml import html from searx.utils import html_to_text from searx.engines.xpath import extract_text @@ -7,6 +8,8 @@ from searx.engines.xpath import extract_text url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' +http_regex = compile(r'^http:') + def result_to_text(url, text, htmlResult): # TODO : remove result ending with "Meaning" or "Category" @@ -19,8 +22,8 @@ def result_to_text(url, text, htmlResult): def request(query, params): - # TODO add kl={locale} params['url'] = url.format(query=urlencode({'q': query})) + params['headers']['Accept-Language'] = params['language'] return params @@ -103,6 +106,10 @@ def response(resp): urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) + # to merge with wikidata's infobox + if infobox_id: + infobox_id = http_regex.sub('https:', infobox_id) + # entity entity = search_res.get('Entity', None) # TODO continent / country / department / location / waterfall / diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py new file mode 100644 index 000000000..0b16773e3 --- /dev/null +++ b/searx/engines/fdroid.py @@ -0,0 +1,53 @@ +""" + F-Droid (a repository of FOSS applications for Android) + + @website https://f-droid.org/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content +""" + +from cgi import escape +from urllib import urlencode +from searx.engines.xpath import extract_text +from lxml import html + +# engine dependent config +categories = ['files'] +paging = True + +# search-url +base_url = 'https://f-droid.org/' +search_url = base_url + 'repository/browse/?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'fdfilter': query, + 'fdpage': params['pageno']}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for app in dom.xpath('//div[@id="appheader"]'): + url = app.xpath('./ancestor::a/@href')[0] + title = app.xpath('./p/span/text()')[0] + img_src = app.xpath('.//img/@src')[0] + + content = extract_text(app.xpath('./p')[0]) + content = escape(content.replace(title, '', 1).strip()) + + results.append({'url': url, + 'title': title, + 'content': content, + 'img_src': img_src}) + + return results diff --git a/searx/engines/google.py b/searx/engines/google.py index 313932200..6018ad1b2 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -46,11 +46,11 @@ country_to_hostname = { 'NZ': 'www.google.co.nz', # New Zealand 'PH': 'www.google.com.ph', # Philippines 'SG': 'www.google.com.sg', # Singapore - # 'US': 'www.google.us', # United State, redirect to .com + # 'US': 'www.google.us', # United States, redirect to .com 'ZA': 'www.google.co.za', # South Africa 'AR': 'www.google.com.ar', # Argentina 'CL': 'www.google.cl', # Chile - 'ES': 'www.google.es', # Span + 'ES': 'www.google.es', # Spain 'MX': 'www.google.com.mx', # Mexico 'EE': 'www.google.ee', # Estonia 'FI': 'www.google.fi', # Finland @@ -61,7 +61,7 @@ country_to_hostname = { 'HU': 'www.google.hu', # Hungary 'IT': 'www.google.it', # Italy 'JP': 'www.google.co.jp', # Japan - 'KR': 'www.google.co.kr', # South Korean + 'KR': 'www.google.co.kr', # South Korea 'LT': 'www.google.lt', # Lithuania 'LV': 'www.google.lv', # Latvia 'NO': 'www.google.no', # Norway @@ -76,9 +76,9 @@ country_to_hostname = { 'SE': 'www.google.se', # Sweden 'TH': 'www.google.co.th', # Thailand 'TR': 'www.google.com.tr', # Turkey - 'UA': 'www.google.com.ua', # Ikraine - # 'CN': 'www.google.cn', # China, only from china ? - 'HK': 'www.google.com.hk', # Hong kong + 'UA': 'www.google.com.ua', # Ukraine + # 'CN': 'www.google.cn', # China, only from China ? + 'HK': 'www.google.com.hk', # Hong Kong 'TW': 'www.google.com.tw' # Taiwan } diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 9d51428cc..efe46812a 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -49,8 +49,6 @@ def response(resp): # parse results for result in dom.xpath('//div[@data-ved]'): - data_url = result.xpath('./a/@href')[0] - data_query = {k: v[0] for k, v in parse_qs(data_url.split('?', 1)[1]).iteritems()} metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0]) @@ -60,11 +58,11 @@ def response(resp): thumbnail_src = thumbnail_src.replace("http://", "https://") # append result - results.append({'url': data_query['imgrefurl'], + results.append({'url': metadata['ru'], 'title': metadata['pt'], 'content': metadata['s'], - 'thumbnail_src': metadata['tu'], - 'img_src': data_query['imgurl'], + 'thumbnail_src': thumbnail_src, + 'img_src': metadata['ou'], 'template': 'images.html'}) # return results diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py new file mode 100644 index 000000000..cda8231f7 --- /dev/null +++ b/searx/engines/nyaa.py @@ -0,0 +1,119 @@ +""" + Nyaa.se (Anime Bittorrent tracker) + + @website http://www.nyaa.se/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content, seed, leech, torrentfile +""" + +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['files', 'images', 'videos', 'music'] +paging = True + +# search-url +base_url = 'http://www.nyaa.se/' +search_url = base_url + '?page=search&{query}&offset={offset}' + +# xpath queries +xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' +xpath_category = './/td[@class="tlisticon"]/a' +xpath_title = './/td[@class="tlistname"]/a' +xpath_torrent_file = './/td[@class="tlistdownload"]/a' +xpath_filesize = './/td[@class="tlistsize"]/text()' +xpath_seeds = './/td[@class="tlistsn"]/text()' +xpath_leeches = './/td[@class="tlistln"]/text()' +xpath_downloads = './/td[@class="tlistdn"]/text()' + + +# convert a variable to integer or return 0 if it's not a number +def int_or_zero(num): + if isinstance(num, list): + if len(num) < 1: + return 0 + num = num[0] + if num.isdigit(): + return int(num) + return 0 + + +# get multiplier to convert torrent size to bytes +def get_filesize_mul(suffix): + return { + 'KB': 1024, + 'MB': 1024 ** 2, + 'GB': 1024 ** 3, + 'TB': 1024 ** 4, + + 'KIB': 1024, + 'MIB': 1024 ** 2, + 'GIB': 1024 ** 3, + 'TIB': 1024 ** 4 + }[str(suffix).upper()] + + +# do search-request +def request(query, params): + query = urlencode({'term': query}) + params['url'] = search_url.format(query=query, offset=params['pageno']) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath(xpath_results): + # category in which our torrent belongs + category = result.xpath(xpath_category)[0].attrib.get('title') + + # torrent title + page_a = result.xpath(xpath_title)[0] + title = escape(extract_text(page_a)) + + # link to the page + href = page_a.attrib.get('href') + + # link to the torrent file + torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') + + # torrent size + try: + file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') + file_size = int(float(file_size) * get_filesize_mul(suffix)) + except Exception as e: + file_size = None + + # seed count + seed = int_or_zero(result.xpath(xpath_seeds)) + + # leech count + leech = int_or_zero(result.xpath(xpath_leeches)) + + # torrent downloads count + downloads = int_or_zero(result.xpath(xpath_downloads)) + + # content string contains all information not included into template + content = 'Category: "{category}". Downloaded {downloads} times.' + content = content.format(category=category, downloads=downloads) + content = escape(content) + + results.append({'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': file_size, + 'torrentfile': torrent_link, + 'template': 'torrent.html'}) + + return results diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py new file mode 100644 index 000000000..3ca7e44f6 --- /dev/null +++ b/searx/engines/reddit.py @@ -0,0 +1,79 @@ +""" + Reddit + + @website https://www.reddit.com/ + @provide-api yes (https://www.reddit.com/dev/api) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, thumbnail, publishedDate +""" + +import json +from cgi import escape +from urllib import urlencode +from urlparse import urlparse, urljoin +from datetime import datetime + +# engine dependent config +categories = ['general', 'images', 'news', 'social media'] +page_size = 25 + +# search-url +base_url = 'https://www.reddit.com/' +search_url = base_url + 'search.json?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'q': query, + 'limit': page_size}) + params['url'] = search_url.format(query=query) + + return params + + +# get response from search-request +def response(resp): + img_results = [] + text_results = [] + + search_results = json.loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + posts = search_results.get('data', {}).get('children', []) + + # process results + for post in posts: + data = post['data'] + + # extract post information + params = { + 'url': urljoin(base_url, data['permalink']), + 'title': data['title'] + } + + # if thumbnail field contains a valid URL, we need to change template + thumbnail = data['thumbnail'] + url_info = urlparse(thumbnail) + # netloc & path + if url_info[1] != '' and url_info[2] != '': + params['img_src'] = data['url'] + params['thumbnail_src'] = thumbnail + params['template'] = 'images.html' + img_results.append(params) + else: + created = datetime.fromtimestamp(data['created_utc']) + content = escape(data['selftext']) + if len(content) > 500: + content = content[:500] + '...' + params['content'] = content + params['publishedDate'] = created + text_results.append(params) + + # show images first and text results second + return img_results + text_results diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py new file mode 100644 index 000000000..17e8e2191 --- /dev/null +++ b/searx/engines/tokyotoshokan.py @@ -0,0 +1,102 @@ +""" + Tokyo Toshokan (A BitTorrent Library for Japanese Media) + + @website https://www.tokyotosho.info/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, publishedDate, seed, leech, + filesize, magnetlink, content +""" + +import re +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text +from datetime import datetime +from searx.engines.nyaa import int_or_zero, get_filesize_mul + +# engine dependent config +categories = ['files', 'videos', 'music'] +paging = True + +# search-url +base_url = 'https://www.tokyotosho.info/' +search_url = base_url + 'search.php?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'page': params['pageno'], + 'terms': query}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + rows = dom.xpath('//table[@class="listing"]//tr[contains(@class, "category_0")]') + + # check if there are no results or page layout was changed so we cannot parse it + # currently there are two rows for each result, so total count must be even + if len(rows) == 0 or len(rows) % 2 != 0: + return [] + + # regular expression for parsing torrent size strings + size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) + + # processing the results, two rows at a time + for i in xrange(0, len(rows), 2): + # parse the first row + name_row = rows[i] + + links = name_row.xpath('./td[@class="desc-top"]/a') + params = { + 'template': 'torrent.html', + 'url': links[-1].attrib.get('href'), + 'title': extract_text(links[-1]) + } + # I have not yet seen any torrents without magnet links, but + # it's better to be prepared to stumble upon one some day + if len(links) == 2: + magnet = links[0].attrib.get('href') + if magnet.startswith('magnet'): + # okay, we have a valid magnet link, let's add it to the result + params['magnetlink'] = magnet + + # no more info in the first row, start parsing the second one + info_row = rows[i + 1] + desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0]) + for item in desc.split('|'): + item = item.strip() + if item.startswith('Size:'): + try: + # ('1.228', 'GB') + groups = size_re.match(item).groups() + multiplier = get_filesize_mul(groups[1]) + params['filesize'] = int(multiplier * float(groups[0])) + except Exception as e: + pass + elif item.startswith('Date:'): + try: + # Date: 2016-02-21 21:44 UTC + date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC') + params['publishedDate'] = date + except Exception as e: + pass + elif item.startswith('Comment:'): + params['content'] = item + stats = info_row.xpath('./td[@class="stats"]/span') + # has the layout not changed yet? + if len(stats) == 3: + params['seed'] = int_or_zero(extract_text(stats[0])) + params['leech'] = int_or_zero(extract_text(stats[1])) + + results.append(params) + + return results diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py new file mode 100644 index 000000000..92fbe7013 --- /dev/null +++ b/searx/engines/torrentz.py @@ -0,0 +1,93 @@ +""" + Torrentz.eu (BitTorrent meta-search engine) + + @website https://torrentz.eu/ + @provide-api no + + @using-api no + @results HTML + @stable no (HTML can change, although unlikely, + see https://torrentz.eu/torrentz.btsearch) + @parse url, title, publishedDate, seed, leech, filesize, magnetlink +""" + +import re +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text +from datetime import datetime +from searx.engines.nyaa import int_or_zero, get_filesize_mul + +# engine dependent config +categories = ['files', 'videos', 'music'] +paging = True + +# search-url +# https://torrentz.eu/search?f=EXAMPLE&p=6 +base_url = 'https://torrentz.eu/' +search_url = base_url + 'search?{query}' + + +# do search-request +def request(query, params): + page = params['pageno'] - 1 + query = urlencode({'q': query, 'p': page}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath('//div[@class="results"]/dl'): + name_cell = result.xpath('./dt')[0] + title = extract_text(name_cell) + + # skip rows that do not contain a link to a torrent + links = name_cell.xpath('./a') + if len(links) != 1: + continue + + # extract url and remove a slash in the beginning + link = links[0].attrib.get('href').lstrip('/') + + seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '') + leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '') + + params = { + 'url': base_url + link, + 'title': title, + 'seed': int_or_zero(seed), + 'leech': int_or_zero(leech), + 'template': 'torrent.html' + } + + # let's try to calculate the torrent size + try: + size_str = result.xpath('./dd/span[@class="s"]/text()')[0] + size, suffix = size_str.split() + params['filesize'] = int(size) * get_filesize_mul(suffix) + except Exception as e: + pass + + # does our link contain a valid SHA1 sum? + if re.compile('[0-9a-fA-F]{40}').match(link): + # add a magnet link to the result + params['magnetlink'] = 'magnet:?xt=urn:btih:' + link + + # extract and convert creation date + try: + date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title') + # Fri, 25 Mar 2016 16:29:01 + date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S') + params['publishedDate'] = date + except Exception as e: + pass + + results.append(params) + + return results diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 9f3496b72..8aa2fcd5c 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -86,15 +86,15 @@ def getDetail(jsonresponse, wikidata_id, language, locale): results.append({'title': title, 'url': official_website}) wikipedia_link_count = 0 + wikipedia_link = get_wikilink(result, language + 'wiki') + wikipedia_link_count += add_url(urls, + 'Wikipedia (' + language + ')', + wikipedia_link) if language != 'en': + wikipedia_en_link = get_wikilink(result, 'enwiki') wikipedia_link_count += add_url(urls, - 'Wikipedia (' + language + ')', - get_wikilink(result, language + - 'wiki')) - wikipedia_en_link = get_wikilink(result, 'enwiki') - wikipedia_link_count += add_url(urls, - 'Wikipedia (en)', - wikipedia_en_link) + 'Wikipedia (en)', + wikipedia_en_link) if wikipedia_link_count == 0: misc_language = get_wiki_firstlanguage(result, 'wiki') if misc_language is not None: @@ -188,7 +188,7 @@ def getDetail(jsonresponse, wikidata_id, language, locale): else: results.append({ 'infobox': title, - 'id': wikipedia_en_link, + 'id': wikipedia_link, 'content': description, 'attributes': attributes, 'urls': urls diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py new file mode 100644 index 000000000..fed7b263f --- /dev/null +++ b/searx/engines/wikipedia.py @@ -0,0 +1,114 @@ +""" + Wikipedia (Web) + + @website https://{language}.wikipedia.org + @provide-api yes + + @using-api yes + @results JSON + @stable yes + @parse url, infobox +""" + +from json import loads +from urllib import urlencode, quote + +# search-url +base_url = 'https://{language}.wikipedia.org/' +search_postfix = 'w/api.php?'\ + 'action=query'\ + '&format=json'\ + '&{query}'\ + '&prop=extracts|pageimages'\ + '&exintro'\ + '&explaintext'\ + '&pithumbsize=300'\ + '&redirects' + + +# set language in base_url +def url_lang(lang): + if lang == 'all': + language = 'en' + else: + language = lang.split('_')[0] + + return base_url.format(language=language) + + +# do search-request +def request(query, params): + if query.islower(): + query += '|' + query.title() + + params['url'] = url_lang(params['language']) \ + + search_postfix.format(query=urlencode({'titles': query})) + + return params + + +# get first meaningful paragraph +# this should filter out disambiguation pages and notes above first paragraph +# "magic numbers" were obtained by fine tuning +def extract_first_paragraph(content, title, image): + first_paragraph = None + + failed_attempts = 0 + for paragraph in content.split('\n'): + + starts_with_title = paragraph.lower().find(title.lower(), 0, len(title) + 35) + length = len(paragraph) + + if length >= 200 or (starts_with_title >= 0 and (image or length >= 150)): + first_paragraph = paragraph + break + + failed_attempts += 1 + if failed_attempts > 3: + return None + + return first_paragraph + + +# get response from search-request +def response(resp): + results = [] + + search_result = loads(resp.content) + + # wikipedia article's unique id + # first valid id is assumed to be the requested article + for article_id in search_result['query']['pages']: + page = search_result['query']['pages'][article_id] + if int(article_id) > 0: + break + + if int(article_id) < 0: + return [] + + title = page.get('title') + + image = page.get('thumbnail') + if image: + image = image.get('source') + + extract = page.get('extract') + + summary = extract_first_paragraph(extract, title, image) + if not summary: + return [] + + # link to wikipedia article + # parenthesis are not quoted to make infobox mergeable with wikidata's + wikipedia_link = url_lang(resp.search_params['language']) \ + + 'wiki/' + quote(title.replace(' ', '_').encode('utf8')).replace('%28', '(').replace('%29', ')') + + results.append({'url': wikipedia_link, 'title': title}) + + results.append({'infobox': title, + 'id': wikipedia_link, + 'content': summary, + 'img_src': image, + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) + + return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index f51634be0..e701c02bf 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -11,6 +11,14 @@ title_xpath = None suggestion_xpath = '' results_xpath = '' +# parameters for engines with paging support +# +# number of results on each page +# (only needed if the site requires not a page number, but an offset) +page_size = 1 +# number of the first page (usually 0 or 1) +first_page_num = 1 + ''' if xpath_results is list, extract the text from each result and concat the list @@ -76,8 +84,14 @@ def normalize_url(url): def request(query, params): query = urlencode({'q': query})[2:] - params['url'] = search_url.format(query=query) + + fp = {'query': query} + if paging and search_url.find('{pageno}') >= 0: + fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + + params['url'] = search_url.format(**fp) params['query'] = query + return params diff --git a/searx/languages.py b/searx/languages.py index b67da9d22..70459a577 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -20,10 +20,10 @@ language_codes = ( ("ar_XA", "Arabic", "Arabia"), ("bg_BG", "Bulgarian", "Bulgaria"), ("cs_CZ", "Czech", "Czech Republic"), - ("de_DE", "German", "Germany"), ("da_DK", "Danish", "Denmark"), ("de_AT", "German", "Austria"), ("de_CH", "German", "Switzerland"), + ("de_DE", "German", "Germany"), ("el_GR", "Greek", "Greece"), ("en_AU", "English", "Australia"), ("en_CA", "English", "Canada"), @@ -58,10 +58,10 @@ language_codes = ( ("ko_KR", "Korean", "Korea"), ("lt_LT", "Lithuanian", "Lithuania"), ("lv_LV", "Latvian", "Latvia"), - ("oc_OC", "Occitan", "Occitan"), ("nb_NO", "Norwegian", "Norway"), ("nl_BE", "Dutch", "Belgium"), ("nl_NL", "Dutch", "Netherlands"), + ("oc_OC", "Occitan", "Occitan"), ("pl_PL", "Polish", "Poland"), ("pt_BR", "Portuguese", "Brazil"), ("pt_PT", "Portuguese", "Portugal"), diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 87cc01382..efb9b0682 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -23,7 +23,8 @@ from searx.plugins import (https_rewrite, open_results_on_new_tab, self_info, search_on_category_select, - tracker_url_remover) + tracker_url_remover, + vim_hotkeys) required_attrs = (('name', str), ('description', str), @@ -77,3 +78,4 @@ plugins.register(open_results_on_new_tab) plugins.register(self_info) plugins.register(search_on_category_select) plugins.register(tracker_url_remover) +plugins.register(vim_hotkeys) diff --git a/searx/plugins/vim_hotkeys.py b/searx/plugins/vim_hotkeys.py new file mode 100644 index 000000000..e537a3ac8 --- /dev/null +++ b/searx/plugins/vim_hotkeys.py @@ -0,0 +1,10 @@ +from flask.ext.babel import gettext + +name = gettext('Vim-like hotkeys') +description = gettext('Navigate search results with Vim-like hotkeys ' + '(JavaScript required). ' + 'Press "h" key on main or result page to get help.') +default_on = False + +js_dependencies = ('plugins/js/vim_hotkeys.js',) +css_dependencies = ('plugins/css/vim_hotkeys.css',) diff --git a/searx/preferences.py b/searx/preferences.py new file mode 100644 index 000000000..a87cd5029 --- /dev/null +++ b/searx/preferences.py @@ -0,0 +1,271 @@ +from searx import settings, autocomplete +from searx.languages import language_codes as languages + + +COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years +LANGUAGE_CODES = [l[0] for l in languages] +LANGUAGE_CODES.append('all') +DISABLED = 0 +ENABLED = 1 + + +class MissingArgumentException(Exception): + pass + + +class ValidationException(Exception): + pass + + +class Setting(object): + """Base class of user settings""" + + def __init__(self, default_value, **kwargs): + super(Setting, self).__init__() + self.value = default_value + for key, value in kwargs.iteritems(): + setattr(self, key, value) + + self._post_init() + + def _post_init(self): + pass + + def parse(self, data): + self.value = data + + def get_value(self): + return self.value + + def save(self, name, resp): + resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE) + + +class StringSetting(Setting): + """Setting of plain string values""" + pass + + +class EnumStringSetting(Setting): + """Setting of a value which can only come from the given choices""" + + def _post_init(self): + if not hasattr(self, 'choices'): + raise MissingArgumentException('Missing argument: choices') + + if self.value != '' and self.value not in self.choices: + raise ValidationException('Invalid default value: {0}'.format(self.value)) + + def parse(self, data): + if data not in self.choices and data != self.value: + raise ValidationException('Invalid choice: {0}'.format(data)) + self.value = data + + +class MultipleChoiceSetting(EnumStringSetting): + """Setting of values which can only come from the given choices""" + + def _post_init(self): + if not hasattr(self, 'choices'): + raise MissingArgumentException('Missing argument: choices') + for item in self.value: + if item not in self.choices: + raise ValidationException('Invalid default value: {0}'.format(self.value)) + + def parse(self, data): + if data == '': + self.value = [] + return + + elements = data.split(',') + for item in elements: + if item not in self.choices: + raise ValidationException('Invalid choice: {0}'.format(item)) + self.value = elements + + def parse_form(self, data): + self.value = [] + for choice in data: + if choice in self.choices and choice not in self.value: + self.value.append(choice) + + def save(self, name, resp): + resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) + + +class MapSetting(Setting): + """Setting of a value that has to be translated in order to be storable""" + + def _post_init(self): + if not hasattr(self, 'map'): + raise MissingArgumentException('missing argument: map') + if self.value not in self.map.values(): + raise ValidationException('Invalid default value') + + def parse(self, data): + if data not in self.map: + raise ValidationException('Invalid choice: {0}'.format(data)) + self.value = self.map[data] + self.key = data + + def save(self, name, resp): + resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE) + + +class SwitchableSetting(Setting): + """ Base class for settings that can be turned on && off""" + + def _post_init(self): + self.disabled = set() + self.enabled = set() + if not hasattr(self, 'choices'): + raise MissingArgumentException('missing argument: choices') + + def transform_form_items(self, items): + return items + + def transform_values(self, values): + return values + + def parse_cookie(self, data): + if data[DISABLED] != '': + self.disabled = set(data[DISABLED].split(',')) + if data[ENABLED] != '': + self.enabled = set(data[ENABLED].split(',')) + + def parse_form(self, items): + items = self.transform_form_items(items) + + self.disabled = set() + self.enabled = set() + for choice in self.choices: + if choice['default_on']: + if choice['id'] in items: + self.disabled.add(choice['id']) + else: + if choice['id'] not in items: + self.enabled.add(choice['id']) + + def save(self, resp): + resp.set_cookie('disabled_{0}'.format(self.value), ','.join(self.disabled), max_age=COOKIE_MAX_AGE) + resp.set_cookie('enabled_{0}'.format(self.value), ','.join(self.enabled), max_age=COOKIE_MAX_AGE) + + def get_disabled(self): + disabled = self.disabled + for choice in self.choices: + if not choice['default_on'] and choice['id'] not in self.enabled: + disabled.add(choice['id']) + return self.transform_values(disabled) + + def get_enabled(self): + enabled = self.enabled + for choice in self.choices: + if choice['default_on'] and choice['id'] not in self.disabled: + enabled.add(choice['id']) + return self.transform_values(enabled) + + +class EnginesSetting(SwitchableSetting): + def _post_init(self): + super(EnginesSetting, self)._post_init() + transformed_choices = [] + for engine_name, engine in self.choices.iteritems(): + for category in engine.categories: + transformed_choice = dict() + transformed_choice['default_on'] = not engine.disabled + transformed_choice['id'] = '{}__{}'.format(engine_name, category) + transformed_choices.append(transformed_choice) + self.choices = transformed_choices + + def transform_form_items(self, items): + return [item[len('engine_'):].replace('_', ' ').replace(' ', '__') for item in items] + + def transform_values(self, values): + if len(values) == 1 and next(iter(values)) == '': + return list() + transformed_values = [] + for value in values: + engine, category = value.split('__') + transformed_values.append((engine, category)) + return transformed_values + + +class PluginsSetting(SwitchableSetting): + def _post_init(self): + super(PluginsSetting, self)._post_init() + transformed_choices = [] + for plugin in self.choices: + transformed_choice = dict() + transformed_choice['default_on'] = plugin.default_on + transformed_choice['id'] = plugin.id + transformed_choices.append(transformed_choice) + self.choices = transformed_choices + + def transform_form_items(self, items): + return [item[len('plugin_'):] for item in items] + + +class Preferences(object): + """Stores, validates and saves preferences to cookies""" + + def __init__(self, themes, categories, engines, plugins): + super(Preferences, self).__init__() + + self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories), + 'language': EnumStringSetting('all', choices=LANGUAGE_CODES), + 'locale': EnumStringSetting(settings['ui']['default_locale'], + choices=settings['locales'].keys()), + 'autocomplete': EnumStringSetting(settings['search']['autocomplete'], + choices=autocomplete.backends.keys()), + 'image_proxy': MapSetting(settings['server']['image_proxy'], + map={'': settings['server']['image_proxy'], + '0': False, + '1': True}), + 'method': EnumStringSetting('POST', choices=('GET', 'POST')), + 'safesearch': MapSetting(settings['search']['safe_search'], map={'0': 0, + '1': 1, + '2': 2}), + 'theme': EnumStringSetting(settings['ui']['default_theme'], choices=themes)} + + self.engines = EnginesSetting('engines', choices=engines) + self.plugins = PluginsSetting('plugins', choices=plugins) + + def parse_cookies(self, input_data): + for user_setting_name, user_setting in input_data.iteritems(): + if user_setting_name in self.key_value_settings: + self.key_value_settings[user_setting_name].parse(user_setting) + elif user_setting_name == 'disabled_engines': + self.engines.parse_cookie((input_data.get('disabled_engines', ''), + input_data.get('enabled_engines', ''))) + elif user_setting_name == 'disabled_plugins': + self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), + input_data.get('enabled_plugins', ''))) + + def parse_form(self, input_data): + disabled_engines = [] + enabled_categories = [] + disabled_plugins = [] + for user_setting_name, user_setting in input_data.iteritems(): + if user_setting_name in self.key_value_settings: + self.key_value_settings[user_setting_name].parse(user_setting) + elif user_setting_name.startswith('engine_'): + disabled_engines.append(user_setting_name) + elif user_setting_name.startswith('category_'): + enabled_categories.append(user_setting_name[len('category_'):]) + elif user_setting_name.startswith('plugin_'): + disabled_plugins.append(user_setting_name) + self.key_value_settings['categories'].parse_form(enabled_categories) + self.engines.parse_form(disabled_engines) + self.plugins.parse_form(disabled_plugins) + + # cannot be used in case of engines or plugins + def get_value(self, user_setting_name): + if user_setting_name in self.key_value_settings: + return self.key_value_settings[user_setting_name].get_value() + + def save(self, resp): + for user_setting_name, user_setting in self.key_value_settings.iteritems(): + user_setting.save(user_setting_name, resp) + self.engines.save(resp) + self.plugins.save(resp) + return resp diff --git a/searx/query.py b/searx/query.py index e79e760a3..3d617ab05 100644 --- a/searx/query.py +++ b/searx/query.py @@ -28,12 +28,12 @@ import re class Query(object): """parse query""" - def __init__(self, query, blocked_engines): + def __init__(self, query, disabled_engines): self.query = query - self.blocked_engines = [] + self.disabled_engines = [] - if blocked_engines: - self.blocked_engines = blocked_engines + if disabled_engines: + self.disabled_engines = disabled_engines self.query_parts = [] self.engines = [] @@ -107,7 +107,7 @@ class Query(object): self.engines.extend({'category': prefix, 'name': engine.name} for engine in categories[prefix] - if (engine.name, prefix) not in self.blocked_engines) + if (engine.name, prefix) not in self.disabled_engines) if query_part[0] == '!': self.specific = True diff --git a/searx/results.py b/searx/results.py index 5d51eb5b5..c3040b305 100644 --- a/searx/results.py +++ b/searx/results.py @@ -37,7 +37,7 @@ def merge_two_infoboxes(infobox1, infobox2): urls1 = infobox1.get('urls', None) if urls1 is None: urls1 = [] - infobox1.set('urls', urls1) + infobox1['urls'] = urls1 urlSet = set() for url in infobox1.get('urls', []): @@ -47,11 +47,17 @@ def merge_two_infoboxes(infobox1, infobox2): if url.get('url', None) not in urlSet: urls1.append(url) + if 'img_src' in infobox2: + img1 = infobox1.get('img_src', None) + img2 = infobox2.get('img_src') + if img1 is None: + infobox1['img_src'] = img2 + if 'attributes' in infobox2: attributes1 = infobox1.get('attributes', None) if attributes1 is None: attributes1 = [] - infobox1.set('attributes', attributes1) + infobox1['attributes'] = attributes1 attributeSet = set() for attribute in infobox1.get('attributes', []): @@ -68,7 +74,7 @@ def merge_two_infoboxes(infobox1, infobox2): if result_content_len(content2) > result_content_len(content1): infobox1['content'] = content2 else: - infobox1.set('content', content2) + infobox1['content'] = content2 def result_score(result): diff --git a/searx/search.py b/searx/search.py index ce41b231b..a40801640 100644 --- a/searx/search.py +++ b/searx/search.py @@ -23,7 +23,7 @@ from searx.engines import ( categories, engines ) from searx.languages import language_codes -from searx.utils import gen_useragent, get_blocked_engines +from searx.utils import gen_useragent from searx.query import Query from searx.results import ResultContainer from searx import logger @@ -140,15 +140,13 @@ class Search(object): self.lang = 'all' # set blocked engines - self.blocked_engines = get_blocked_engines(engines, request.cookies) + self.disabled_engines = request.preferences.engines.get_disabled() self.result_container = ResultContainer() self.request_data = {} # set specific language if set - if request.cookies.get('language')\ - and request.cookies['language'] in (x[0] for x in language_codes): - self.lang = request.cookies['language'] + self.lang = request.preferences.get_value('language') # set request method if request.method == 'POST': @@ -169,7 +167,7 @@ class Search(object): # parse query, if tags are set, which change # the serch engine or search-language - query_obj = Query(self.request_data['q'], self.blocked_engines) + query_obj = Query(self.request_data['q'], self.disabled_engines) query_obj.parse_query() # set query @@ -229,8 +227,7 @@ class Search(object): # using user-defined default-configuration which # (is stored in cookie) if not self.categories: - cookie_categories = request.cookies.get('categories', '') - cookie_categories = cookie_categories.split(',') + cookie_categories = request.preferences.get_value('categories') for ccateg in cookie_categories: if ccateg in categories: self.categories.append(ccateg) @@ -246,7 +243,7 @@ class Search(object): self.engines.extend({'category': categ, 'name': engine.name} for engine in categories[categ] - if (engine.name, categ) not in self.blocked_engines) + if (engine.name, categ) not in self.disabled_engines) # remove suspended engines self.engines = [e for e in self.engines @@ -294,11 +291,8 @@ class Search(object): else: request_params['language'] = self.lang - try: - # 0 = None, 1 = Moderate, 2 = Strict - request_params['safesearch'] = int(request.cookies.get('safesearch')) - except Exception: - request_params['safesearch'] = settings['search']['safe_search'] + # 0 = None, 1 = Moderate, 2 = Strict + request_params['safesearch'] = request.preferences.get_value('safesearch') # update request parameters dependent on # search-engine (contained in engines folder) diff --git a/searx/settings.yml b/searx/settings.yml index c388f55ec..96455fc23 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -45,10 +45,9 @@ engines: shortcut : bs - name : wikipedia - engine : mediawiki + engine : wikipedia shortcut : wp base_url : 'https://{language}.wikipedia.org/' - number_of_results : 1 - name : bing engine : bing @@ -62,6 +61,18 @@ engines: engine : bing_news shortcut : bin + - name : bitbucket + engine : xpath + paging : True + search_url : https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath : //article[@class="repo-summary"]/p + categories : it + timeout : 4.0 + disabled : True + shortcut : bb + - name : btdigg engine : btdigg shortcut : bt @@ -83,11 +94,25 @@ engines: - name : ddg definitions engine : duckduckgo_definitions shortcut : ddd + disabled : True - name : digg engine : digg shortcut : dg + - name : erowid + engine : xpath + paging : True + first_page_num : 0 + page_size : 30 + search_url : https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath : //dl[@class="results-list"]/dd[@class="result-details"] + categories : general + shortcut : ew + disabled : True + - name : wikidata engine : wikidata shortcut : wd @@ -111,6 +136,11 @@ engines: shortcut : 1x disabled : True + - name : fdroid + engine : fdroid + shortcut : fd + disabled : True + - name : flickr categories : images shortcut : fl @@ -131,6 +161,18 @@ engines: shortcut : gb disabled: True + - name : gitlab + engine : xpath + paging : True + search_url : https://gitlab.com/search?page={pageno}&search={query} + url_xpath : //li[@class="project-row"]//a[@class="project"]/@href + title_xpath : //li[@class="project-row"]//span[contains(@class, "project-full-name")] + content_xpath : //li[@class="project-row"]//div[@class="description"]/p + categories : it + shortcut : gl + timeout : 5.0 + disabled : True + - name : github engine : github shortcut : gh @@ -177,10 +219,39 @@ engines: shortcut : gps disabled : True + - name : geektimes + engine : xpath + paging : True + search_url : https://geektimes.ru/search/page{pageno}/?q={query} + url_xpath : //div[@class="search_results"]//a[@class="post_title"]/@href + title_xpath : //div[@class="search_results"]//a[@class="post_title"] + content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] + categories : it + timeout : 4.0 + disabled : True + shortcut : gt + + - name : habrahabr + engine : xpath + paging : True + search_url : https://habrahabr.ru/search/page{pageno}/?q={query} + url_xpath : //div[@class="search_results"]//a[@class="post_title"]/@href + title_xpath : //div[@class="search_results"]//a[@class="post_title"] + content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] + categories : it + timeout : 4.0 + disabled : True + shortcut : habr + - name : mixcloud engine : mixcloud shortcut : mc + - name : nyaa + engine : nyaa + shortcut : nt + disabled : True + - name : openstreetmap engine : openstreetmap shortcut : osm @@ -215,6 +286,13 @@ engines: shortcut : qws categories : social media + - name : reddit + engine : reddit + shortcut : re + page_size : 25 + timeout : 10.0 + disabled : True + - name : kickass engine : kickass shortcut : ka @@ -266,6 +344,17 @@ engines: shortcut : sw disabled : True + - name : tokyotoshokan + engine : tokyotoshokan + shortcut : tt + timeout : 6.0 + disabled : True + + - name : torrentz + engine : torrentz + timeout : 5.0 + shortcut : to + - name : twitter engine : twitter shortcut : tw @@ -339,6 +428,13 @@ engines: # number_of_results : 5 # timeout : 3.0 +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name : ubuntuwiki +# engine : doku +# shortcut : uw +# base_url : 'http://doc.ubuntu-fr.org' + locales: en : English bg : Български (Bulgarian) diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index fb193e43d..7c7c4eec2 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -4,7 +4,7 @@ general: search: safe_search : 0 - autocomplete : 0 + autocomplete : "" server: port : 11111 diff --git a/searx/static/plugins/css/vim_hotkeys.css b/searx/static/plugins/css/vim_hotkeys.css new file mode 100644 index 000000000..2ccfdc1af --- /dev/null +++ b/searx/static/plugins/css/vim_hotkeys.css @@ -0,0 +1,26 @@ +.vim-hotkeys-help { + position: fixed; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + z-index: 9999999; + overflow-y: auto; + max-height: 80%; + box-shadow: 0 0 1em; +} + +.dflex { + display: -webkit-box; /* OLD - iOS 6-, Safari 3.1-6 */ + display: -moz-box; /* OLD - Firefox 19- (buggy but mostly works) */ + display: -ms-flexbox; /* TWEENER - IE 10 */ + display: -webkit-flex; /* NEW - Chrome */ + display: flex; /* NEW, Spec - Opera 12.1, Firefox 20+ */ +} + +.iflex { + -webkit-box-flex: 1; /* OLD - iOS 6-, Safari 3.1-6 */ + -moz-box-flex: 1; /* OLD - Firefox 19- */ + -webkit-flex: 1; /* Chrome */ + -ms-flex: 1; /* IE 10 */ + flex: 1; /* NEW, Spec - Opera 12.1, Firefox 20+ */ +} diff --git a/searx/static/plugins/js/vim_hotkeys.js b/searx/static/plugins/js/vim_hotkeys.js new file mode 100644 index 000000000..61500d8f5 --- /dev/null +++ b/searx/static/plugins/js/vim_hotkeys.js @@ -0,0 +1,336 @@ +$(document).ready(function() { + highlightResult('top')(); + + $('.result').on('click', function() { + highlightResult($(this))(); + }); + + var vimKeys = { + 27: { + key: 'Escape', + fun: removeFocus, + des: 'remove focus from the focused input', + cat: 'Control' + }, + 73: { + key: 'i', + fun: searchInputFocus, + des: 'focus on the search input', + cat: 'Control' + }, + 66: { + key: 'b', + fun: scrollPage(-window.innerHeight), + des: 'scroll one page up', + cat: 'Navigation' + }, + 70: { + key: 'f', + fun: scrollPage(window.innerHeight), + des: 'scroll one page down', + cat: 'Navigation' + }, + 85: { + key: 'u', + fun: scrollPage(-window.innerHeight / 2), + des: 'scroll half a page up', + cat: 'Navigation' + }, + 68: { + key: 'd', + fun: scrollPage(window.innerHeight / 2), + des: 'scroll half a page down', + cat: 'Navigation' + }, + 71: { + key: 'g', + fun: scrollPageTo(-document.body.scrollHeight, 'top'), + des: 'scroll to the top of the page', + cat: 'Navigation' + }, + 86: { + key: 'v', + fun: scrollPageTo(document.body.scrollHeight, 'bottom'), + des: 'scroll to the bottom of the page', + cat: 'Navigation' + }, + 75: { + key: 'k', + fun: highlightResult('up'), + des: 'select previous search result', + cat: 'Results' + }, + 74: { + key: 'j', + fun: highlightResult('down'), + des: 'select next search result', + cat: 'Results' + }, + 80: { + key: 'p', + fun: pageButtonClick(0), + des: 'go to previous page', + cat: 'Results' + }, + 78: { + key: 'n', + fun: pageButtonClick(1), + des: 'go to next page', + cat: 'Results' + }, + 79: { + key: 'o', + fun: openResult(false), + des: 'open search result', + cat: 'Results' + }, + 84: { + key: 't', + fun: openResult(true), + des: 'open the result in a new tab', + cat: 'Results' + }, + 82: { + key: 'r', + fun: reloadPage, + des: 'reload page from the server', + cat: 'Control' + }, + 72: { + key: 'h', + fun: toggleHelp, + des: 'toggle help window', + cat: 'Other' + } + }; + + $(document).keyup(function(e) { + // check for modifiers so we don't break browser's hotkeys + if (vimKeys.hasOwnProperty(e.keyCode) + && !e.ctrlKey + && !e.altKey + && !e.shiftKey + && !e.metaKey) + { + if (e.keyCode === 27) { + if (e.target.tagName.toLowerCase() === 'input') { + vimKeys[e.keyCode].fun(); + } + } else { + if (e.target === document.body) { + vimKeys[e.keyCode].fun(); + } + } + } + }); + + function highlightResult(which) { + return function() { + var current = $('.result[data-vim-selected]'); + if (current.length === 0) { + current = $('.result:first'); + if (current.length === 0) { + return; + } + } + + var next; + + if (typeof which !== 'string') { + next = which; + } else { + switch (which) { + case 'visible': + var top = $(window).scrollTop(); + var bot = top + $(window).height(); + var results = $('.result'); + + for (var i = 0; i < results.length; i++) { + next = $(results[i]); + var etop = next.offset().top; + var ebot = etop + next.height(); + + if ((ebot <= bot) && (etop > top)) { + break; + } + } + break; + case 'down': + next = current.next('.result'); + if (next.length === 0) { + next = $('.result:first'); + } + break; + case 'up': + next = current.prev('.result'); + if (next.length === 0) { + next = $('.result:last'); + } + break; + case 'bottom': + next = $('.result:last'); + break; + case 'top': + default: + next = $('.result:first'); + } + } + + if (next) { + current.removeAttr('data-vim-selected').removeClass('well well-sm'); + next.attr('data-vim-selected', 'true').addClass('well well-sm'); + scrollPageToSelected(); + } + } + } + + function reloadPage() { + document.location.reload(false); + } + + function removeFocus() { + if (document.activeElement) { + document.activeElement.blur(); + } + } + + function pageButtonClick(num) { + return function() { + var buttons = $('div#pagination button[type="submit"]'); + if (buttons.length !== 2) { + console.log('page navigation with this theme is not supported'); + return; + } + if (num >= 0 && num < buttons.length) { + buttons[num].click(); + } else { + console.log('pageButtonClick(): invalid argument'); + } + } + } + + function scrollPageToSelected() { + var sel = $('.result[data-vim-selected]'); + if (sel.length !== 1) { + return; + } + + var wnd = $(window); + + var wtop = wnd.scrollTop(); + var etop = sel.offset().top; + + var offset = 30; + + if (wtop > etop) { + wnd.scrollTop(etop - offset); + } else { + var ebot = etop + sel.height(); + var wbot = wtop + wnd.height(); + + if (wbot < ebot) { + wnd.scrollTop(ebot - wnd.height() + offset); + } + } + } + + function scrollPage(amount) { + return function() { + window.scrollBy(0, amount); + highlightResult('visible')(); + } + } + + function scrollPageTo(position, nav) { + return function() { + window.scrollTo(0, position); + highlightResult(nav)(); + } + } + + function searchInputFocus() { + $('input#q').focus(); + } + + function openResult(newTab) { + return function() { + var link = $('.result[data-vim-selected] .result_header a'); + if (link.length) { + var url = link.attr('href'); + if (newTab) { + window.open(url); + } else { + window.location.href = url; + } + } + }; + } + + function toggleHelp() { + var helpPanel = $('#vim-hotkeys-help'); + if (helpPanel.length) { + helpPanel.toggleClass('hidden'); + return; + } + + var categories = {}; + + for (var k in vimKeys) { + var key = vimKeys[k]; + categories[key.cat] = categories[key.cat] || []; + categories[key.cat].push(key); + } + + var sorted = Object.keys(categories).sort(function(a, b) { + return categories[b].length - categories[a].length; + }); + + if (sorted.length === 0) { + return; + } + + var html = '<div id="vim-hotkeys-help" class="well vim-hotkeys-help">'; + html += '<div class="container-fluid">'; + + html += '<div class="row">'; + html += '<div class="col-sm-12">'; + html += '<h3>How to navigate searx with Vim-like hotkeys</h3>'; + html += '</div>'; // col-sm-12 + html += '</div>'; // row + + for (var i = 0; i < sorted.length; i++) { + var cat = categories[sorted[i]]; + + var lastCategory = i === (sorted.length - 1); + var first = i % 2 === 0; + + if (first) { + html += '<div class="row dflex">'; + } + html += '<div class="col-sm-' + (first && lastCategory ? 12 : 6) + ' dflex">'; + + html += '<div class="panel panel-default iflex">'; + html += '<div class="panel-heading">' + cat[0].cat + '</div>'; + html += '<div class="panel-body">'; + html += '<ul class="list-unstyled">'; + + for (var cj in cat) { + html += '<li><kbd>' + cat[cj].key + '</kbd> ' + cat[cj].des + '</li>'; + } + + html += '</ul>'; + html += '</div>'; // panel-body + html += '</div>'; // panel + html += '</div>'; // col-sm-* + + if (!first || lastCategory) { + html += '</div>'; // row + } + } + + html += '</div>'; // container-fluid + html += '</div>'; // vim-hotkeys-help + + $('body').append(html); + } +}); diff --git a/searx/templates/courgette/preferences.html b/searx/templates/courgette/preferences.html index f89915d8d..ba4d0c650 100644 --- a/searx/templates/courgette/preferences.html +++ b/searx/templates/courgette/preferences.html @@ -109,7 +109,7 @@ <td>{{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎</td> <td>{{ _(categ) }}</td> <td class="engine_checkbox"> - <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in blocked_engines %} checked="checked"{% endif %} /> + <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in disabled_engines %} checked="checked"{% endif %} /> <label class="allow" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Allow') }}</label> <label class="deny" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Block') }}</label> </td> diff --git a/searx/templates/default/preferences.html b/searx/templates/default/preferences.html index 90006c029..a47dba458 100644 --- a/searx/templates/default/preferences.html +++ b/searx/templates/default/preferences.html @@ -97,7 +97,7 @@ <td>{{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎</td> <td>{{ _(categ) }}</td> <td class="engine_checkbox"> - <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in blocked_engines %} checked="checked"{% endif %} /> + <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in disabled_engines %} checked="checked"{% endif %} /> <label class="allow" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Allow') }}</label> <label class="deny" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Block') }}</label> </td> diff --git a/searx/templates/oscar/base.html b/searx/templates/oscar/base.html index 4813fffc2..f63025ecc 100644 --- a/searx/templates/oscar/base.html +++ b/searx/templates/oscar/base.html @@ -86,5 +86,8 @@ {% for script in scripts %} <script src="{{ url_for('static', filename=script) }}"></script> {% endfor %} + <script type="text/javascript"> + $(function() { $('a[data-toggle="modal"]').attr('href', '#'); }); + </script> </body> </html> diff --git a/searx/templates/oscar/infobox.html b/searx/templates/oscar/infobox.html index d87d98453..c72cfb638 100644 --- a/searx/templates/oscar/infobox.html +++ b/searx/templates/oscar/infobox.html @@ -1,8 +1,9 @@ <div class="panel panel-default infobox"> <div class="panel-heading"> - <h4 class="panel-title infobox_part">{{ infobox.infobox }}</h4> + <bdi><h4 class="panel-title infobox_part">{{ infobox.infobox }}</h4></bdi> </div> <div class="panel-body"> + <bdi> {% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %} {% if infobox.content %}<p class="infobox_part">{{ infobox.content }}</p>{% endif %} @@ -28,5 +29,6 @@ {% endfor %} </div> {% endif %} + </bdi> </div> </div> diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html index c677a0c66..a2c493a02 100644 --- a/searx/templates/oscar/preferences.html +++ b/searx/templates/oscar/preferences.html @@ -157,7 +157,7 @@ {% if not search_engine.private %} <tr> {% if not rtl %} - <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in blocked_engines) }}</td> + <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in disabled_engines) }}</td> <th>{{ search_engine.name }}</th> <td>{{ shortcuts[search_engine.name] }}</td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> @@ -169,7 +169,7 @@ <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td>{{ shortcuts[search_engine.name] }}</td> <th>{{ search_engine.name }}</th> - <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in blocked_engines) }}</td> + <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in disabled_engines) }}</td> {% endif %} </tr> {% endif %} diff --git a/searx/templates/oscar/result_templates/default.html b/searx/templates/oscar/result_templates/default.html index fc61b8ce0..f283693c3 100644 --- a/searx/templates/oscar/result_templates/default.html +++ b/searx/templates/oscar/result_templates/default.html @@ -13,7 +13,16 @@ </div>
{% endif %}
+{% if result.img_src %}
+<div class="container-fluid">
+ <div class="row">
+<img src="{{ image_proxify(result.img_src) }}" alt="{{ result.title|striptags }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content">
+{% if result.content %}<p class="result-content col-xs-8 col-sm-8 col-md-8">{{ result.content|safe }}</p>{% endif %}
+ </div>
+</div>
+{% else %}
{% if result.content %}<p class="result-content">{{ result.content|safe }}</p>{% endif %}
+{% endif %}
{% if rtl %}
{{ result_footer_rtl(result) }}
diff --git a/searx/templates/pix-art/preferences.html b/searx/templates/pix-art/preferences.html index f59497ec8..a4a6cd268 100644 --- a/searx/templates/pix-art/preferences.html +++ b/searx/templates/pix-art/preferences.html @@ -60,7 +60,7 @@ <tr> <td>{{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎</td> <td class="engine_checkbox"> - <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in blocked_engines %} checked="checked"{% endif %} /> + <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in disabled_engines %} checked="checked"{% endif %} /> <label class="allow" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Allow') }}</label> <label class="deny" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Block') }}</label> </td> diff --git a/searx/utils.py b/searx/utils.py index 506228465..b297582ef 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -230,26 +230,3 @@ def list_get(a_list, index, default=None): return a_list[index] else: return default - - -def get_blocked_engines(engines, cookies): - if 'blocked_engines' not in cookies: - return [(engine_name, category) for engine_name in engines - for category in engines[engine_name].categories if engines[engine_name].disabled] - - blocked_engine_strings = cookies.get('blocked_engines', '').split(',') - blocked_engines = [] - - if not blocked_engine_strings: - return blocked_engines - - for engine_string in blocked_engine_strings: - if engine_string.find('__') > -1: - engine, category = engine_string.split('__', 1) - if engine in engines and category in engines[engine].categories: - blocked_engines.append((engine, category)) - elif engine_string in engines: - for category in engines[engine_string].categories: - blocked_engines.append((engine_string, category)) - - return blocked_engines diff --git a/searx/webapp.py b/searx/webapp.py index d10f04e21..ed8ff1bd9 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -56,7 +56,7 @@ from searx.engines import ( from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes, get_static_files, get_result_templates, gen_useragent, dict_subset, - prettify_url, get_blocked_engines + prettify_url ) from searx.version import VERSION_STRING from searx.languages import language_codes @@ -64,6 +64,7 @@ from searx.search import Search from searx.query import Query from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.plugins import plugins +from searx.preferences import Preferences # check if the pyopenssl, ndg-httpsclient, pyasn1 packages are installed. # They are needed for SSL connection without trouble, see #298 @@ -73,7 +74,7 @@ try: import pyasn1 # NOQA except ImportError: logger.critical("The pyopenssl, ndg-httpsclient, pyasn1 packages have to be installed.\n" - "Some HTTPS connections will failed") + "Some HTTPS connections will fail") static_path, templates_path, themes =\ @@ -109,8 +110,7 @@ for indice, theme in enumerate(themes): for (dirpath, dirnames, filenames) in os.walk(theme_img_path): global_favicons[indice].extend(filenames) -cookie_max_age = 60 * 60 * 24 * 365 * 5 # 5 years - +# used when translating category names _category_names = (gettext('files'), gettext('general'), gettext('music'), @@ -129,11 +129,8 @@ outgoing_proxies = settings['outgoing'].get('proxies', None) def get_locale(): locale = request.accept_languages.best_match(settings['locales'].keys()) - if settings['ui'].get('default_locale'): - locale = settings['ui']['default_locale'] - - if request.cookies.get('locale', '') in settings['locales']: - locale = request.cookies.get('locale', '') + if request.preferences.get_value('locale') != '': + locale = request.preferences.get_value('locale') if 'locale' in request.args\ and request.args['locale'] in settings['locales']: @@ -222,9 +219,7 @@ def get_current_theme_name(override=None): if override and override in themes: return override - theme_name = request.args.get('theme', - request.cookies.get('theme', - default_theme)) + theme_name = request.args.get('theme', request.preferences.get_value('theme')) if theme_name not in themes: theme_name = default_theme return theme_name @@ -251,7 +246,7 @@ def image_proxify(url): if url.startswith('//'): url = 'https:' + url - if not settings['server'].get('image_proxy') and not request.cookies.get('image_proxy'): + if not request.preferences.get_value('image_proxy'): return url hash_string = url + settings['server']['secret_key'] @@ -262,23 +257,18 @@ def image_proxify(url): def render(template_name, override_theme=None, **kwargs): - blocked_engines = get_blocked_engines(engines, request.cookies) - - autocomplete = request.cookies.get('autocomplete', settings['search']['autocomplete']) - - if autocomplete not in autocomplete_backends: - autocomplete = None + disabled_engines = request.preferences.engines.get_disabled() - nonblocked_categories = set(category for engine_name in engines - for category in engines[engine_name].categories - if (engine_name, category) not in blocked_engines) + enabled_categories = set(category for engine_name in engines + for category in engines[engine_name].categories + if (engine_name, category) not in disabled_engines) if 'categories' not in kwargs: kwargs['categories'] = ['general'] kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general' - and x in nonblocked_categories) + and x in enabled_categories) if 'all_categories' not in kwargs: kwargs['all_categories'] = ['general'] @@ -295,25 +285,24 @@ def render(template_name, override_theme=None, **kwargs): kwargs['selected_categories'].append(c) if not kwargs['selected_categories']: - cookie_categories = request.cookies.get('categories', '').split(',') + cookie_categories = request.preferences.get_value('categories') for ccateg in cookie_categories: - if ccateg in categories: - kwargs['selected_categories'].append(ccateg) + kwargs['selected_categories'].append(ccateg) if not kwargs['selected_categories']: kwargs['selected_categories'] = ['general'] if 'autocomplete' not in kwargs: - kwargs['autocomplete'] = autocomplete + kwargs['autocomplete'] = request.preferences.get_value('autocomplete') if get_locale() in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True kwargs['searx_version'] = VERSION_STRING - kwargs['method'] = request.cookies.get('method', 'POST') + kwargs['method'] = request.preferences.get_value('method') - kwargs['safesearch'] = request.cookies.get('safesearch', str(settings['search']['safe_search'])) + kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) # override url_for function in templates kwargs['url_for'] = url_for_theme @@ -347,14 +336,18 @@ def render(template_name, override_theme=None, **kwargs): @app.before_request def pre_request(): # merge GET, POST vars + preferences = Preferences(themes, categories.keys(), engines, plugins) + preferences.parse_cookies(request.cookies) + request.preferences = preferences + request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v request.user_plugins = [] - allowed_plugins = request.cookies.get('allowed_plugins', '').split(',') - disabled_plugins = request.cookies.get('disabled_plugins', '').split(',') + allowed_plugins = preferences.plugins.get_enabled() + disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): @@ -486,18 +479,18 @@ def autocompleter(): request_data = request.args # set blocked engines - blocked_engines = get_blocked_engines(engines, request.cookies) + disabled_engines = request.preferences.engines.get_disabled() # parse query - query = Query(request_data.get('q', '').encode('utf-8'), blocked_engines) + query = Query(request_data.get('q', '').encode('utf-8'), disabled_engines) query.parse_query() # check if search query is set if not query.getSearchQuery(): return '', 400 - # get autocompleter - completer = autocomplete_backends.get(request.cookies.get('autocomplete', settings['search']['autocomplete'])) + # run autocompleter + completer = autocomplete_backends.get(request.preferences.get_value('autocomplete')) # parse searx specific autocompleter results like !bang raw_results = searx_bang(query) @@ -505,7 +498,7 @@ def autocompleter(): # normal autocompletion results only appear if max 3 inner results returned if len(raw_results) <= 3 and completer: # get language from cookie - language = request.cookies.get('language') + language = request.preferences.get_value('language') if not language or language == 'all': language = 'en' else: @@ -532,117 +525,23 @@ def autocompleter(): @app.route('/preferences', methods=['GET', 'POST']) def preferences(): - """Render preferences page. - - Settings that are going to be saved as cookies.""" - lang = None - image_proxy = request.cookies.get('image_proxy', settings['server'].get('image_proxy')) - - if request.cookies.get('language')\ - and request.cookies['language'] in (x[0] for x in language_codes): - lang = request.cookies['language'] - - blocked_engines = [] - - resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) - - if request.method == 'GET': - blocked_engines = get_blocked_engines(engines, request.cookies) - else: # on save - selected_categories = [] - post_disabled_plugins = [] - locale = None - autocomplete = '' - method = 'POST' - safesearch = settings['search']['safe_search'] - for pd_name, pd in request.form.items(): - if pd_name.startswith('category_'): - category = pd_name[9:] - if category not in categories: - continue - selected_categories.append(category) - elif pd_name == 'locale' and pd in settings['locales']: - locale = pd - elif pd_name == 'image_proxy': - image_proxy = pd - elif pd_name == 'autocomplete': - autocomplete = pd - elif pd_name == 'language' and (pd == 'all' or - pd in (x[0] for - x in language_codes)): - lang = pd - elif pd_name == 'method': - method = pd - elif pd_name == 'safesearch': - safesearch = pd - elif pd_name.startswith('engine_'): - if pd_name.find('__') > -1: - # TODO fix underscore vs space - engine_name, category = [x.replace('_', ' ') for x in - pd_name.replace('engine_', '', 1).split('__', 1)] - if engine_name in engines and category in engines[engine_name].categories: - blocked_engines.append((engine_name, category)) - elif pd_name == 'theme': - theme = pd if pd in themes else default_theme - elif pd_name.startswith('plugin_'): - plugin_id = pd_name.replace('plugin_', '', 1) - if not any(plugin.id == plugin_id for plugin in plugins): - continue - post_disabled_plugins.append(plugin_id) - else: - resp.set_cookie(pd_name, pd, max_age=cookie_max_age) - - disabled_plugins = [] - allowed_plugins = [] - for plugin in plugins: - if plugin.default_on: - if plugin.id in post_disabled_plugins: - disabled_plugins.append(plugin.id) - elif plugin.id not in post_disabled_plugins: - allowed_plugins.append(plugin.id) + """Render preferences page && save user preferences""" - resp.set_cookie('disabled_plugins', ','.join(disabled_plugins), max_age=cookie_max_age) - - resp.set_cookie('allowed_plugins', ','.join(allowed_plugins), max_age=cookie_max_age) - - resp.set_cookie( - 'blocked_engines', ','.join('__'.join(e) for e in blocked_engines), - max_age=cookie_max_age - ) - - if locale: - resp.set_cookie( - 'locale', locale, - max_age=cookie_max_age - ) - - if lang: - resp.set_cookie( - 'language', lang, - max_age=cookie_max_age - ) - - if selected_categories: - # cookie max age: 4 weeks - resp.set_cookie( - 'categories', ','.join(selected_categories), - max_age=cookie_max_age - ) - - resp.set_cookie( - 'autocomplete', autocomplete, - max_age=cookie_max_age - ) - - resp.set_cookie('method', method, max_age=cookie_max_age) - - resp.set_cookie('safesearch', str(safesearch), max_age=cookie_max_age) - - resp.set_cookie('image_proxy', image_proxy, max_age=cookie_max_age) - - resp.set_cookie('theme', theme, max_age=cookie_max_age) - - return resp + # save preferences + if request.method == 'POST': + resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) + try: + request.preferences.parse_form(request.form) + except ValidationException: + # TODO use flash feature of flask + return resp + return request.preferences.save(resp) + + # render preferences + image_proxy = request.preferences.get_value('image_proxy') + lang = request.preferences.get_value('language') + disabled_engines = request.preferences.engines.get_disabled() + allowed_plugins = request.preferences.plugins.get_enabled() # stats for preferences page stats = {} @@ -664,17 +563,17 @@ def preferences(): return render('preferences.html', locales=settings['locales'], current_locale=get_locale(), - current_language=lang or 'all', + current_language=lang, image_proxy=image_proxy, language_codes=language_codes, engines_by_category=categories, stats=stats, - blocked_engines=blocked_engines, + disabled_engines=disabled_engines, autocomplete_backends=autocomplete_backends, shortcuts={y: x for x, y in engine_shortcuts.items()}, themes=themes, plugins=plugins, - allowed_plugins=[plugin.id for plugin in request.user_plugins], + allowed_plugins=allowed_plugins, theme=get_current_theme_name()) @@ -750,7 +649,7 @@ Disallow: /preferences def opensearch(): method = 'post' - if request.cookies.get('method', 'POST') == 'GET': + if request.preferences.get_value('method') == 'GET': method = 'get' # chrome/chromium only supports HTTP GET.... diff --git a/tests/robot/test_basic.robot b/tests/robot/test_basic.robot index 1b8e78fff..4a20d0ba2 100644 --- a/tests/robot/test_basic.robot +++ b/tests/robot/test_basic.robot @@ -42,3 +42,111 @@ Change language Location Should Be http://localhost:11111/ Page Should Contain rólunk Page Should Contain beállítások + +Change method + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + Select From List method GET + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be method GET + Select From List method POST + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be method POST + +Change theme + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + List Selection Should Be theme default + Select From List theme oscar + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be theme oscar + +Change safesearch + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + List Selection Should Be safesearch None + Select From List safesearch Strict + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be safesearch Strict + +Change image proxy + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + List Selection Should Be image_proxy Disabled + Select From List image_proxy Enabled + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be image_proxy Enabled + +Change search language + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + List Selection Should Be language Automatic + Select From List language Turkish (Turkey) - tr_TR + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be language Turkish (Turkey) - tr_TR + +Change autocomplete + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + List Selection Should Be autocomplete - + Select From List autocomplete google + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be autocomplete google + +Change allowed/disabled engines + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + Page Should Contain Engine name + Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block + Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] Block + Click Element xpath=//label[@class="deny"][@for='engine_general_general_dummy'] + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + Page Should Contain Engine name + Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block + Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] \ + +Block a plugin + Page Should Contain about + Page Should Contain preferences + Go To http://localhost:11111/preferences + List Selection Should Be theme default + Select From List theme oscar + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + List Selection Should Be theme oscar + Page Should Contain Plugins + Click Link Plugins + Checkbox Should Not Be Selected id=plugin_HTTPS_rewrite + Click Element xpath=//label[@for='plugin_HTTPS_rewrite'] + Submit Form id=search_form + Location Should Be http://localhost:11111/ + Go To http://localhost:11111/preferences + Page Should Contain Plugins + Click Link Plugins + Checkbox Should Be Selected id=plugin_HTTPS_rewrite diff --git a/tests/unit/engines/test_currency_convert.py b/tests/unit/engines/test_currency_convert.py index 84ec3b742..b7720569f 100644 --- a/tests/unit/engines/test_currency_convert.py +++ b/tests/unit/engines/test_currency_convert.py @@ -14,23 +14,19 @@ class TestCurrencyConvertEngine(SearxTestCase): params = currency_convert.request(query, dicto) self.assertNotIn('url', params) - query = '1.1.1 EUR in USD' - params = currency_convert.request(query, dicto) - self.assertNotIn('url', params) - - query = '10 eur in usd' + query = 'convert 10 Pound Sterlings to United States Dollars' params = currency_convert.request(query, dicto) self.assertIn('url', params) self.assertIn('finance.yahoo.com', params['url']) - self.assertIn('EUR', params['url']) + self.assertIn('GBP', params['url']) self.assertIn('USD', params['url']) def test_response(self): dicto = defaultdict(dict) dicto['ammount'] = float(10) - dicto['from'] = "EUR" + dicto['from'] = "GBP" dicto['to'] = "USD" - dicto['from_name'] = "euro" + dicto['from_name'] = "pound sterling" dicto['to_name'] = "United States dollar" response = mock.Mock(text='a,b,c,d', search_params=dicto) self.assertEqual(currency_convert.response(response), []) @@ -40,7 +36,8 @@ class TestCurrencyConvertEngine(SearxTestCase): results = currency_convert.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) - self.assertEqual(results[0]['answer'], '10.0 EUR = 5.0 USD, 1 EUR (euro) = 0.5 USD (United States dollar)') + self.assertEqual(results[0]['answer'], '10.0 GBP = 5.0 USD, 1 GBP (pound sterling)' + + ' = 0.5 USD (United States dollar)') now_date = datetime.now().strftime('%Y%m%d') self.assertEqual(results[0]['url'], 'https://finance.yahoo.com/currency/converter-results/' + - now_date + '/10.0-eur-to-usd.html') + now_date + '/10.0-gbp-to-usd.html') diff --git a/tests/unit/engines/test_doku.py b/tests/unit/engines/test_doku.py new file mode 100644 index 000000000..22ddb7a7f --- /dev/null +++ b/tests/unit/engines/test_doku.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import doku +from searx.testing import SearxTestCase + + +class TestDokuEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + params = doku.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + + def test_response(self): + self.assertRaises(AttributeError, doku.response, None) + self.assertRaises(AttributeError, doku.response, []) + self.assertRaises(AttributeError, doku.response, '') + self.assertRaises(AttributeError, doku.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(doku.response(response), []) + + html = u""" + <div class="search_quickresult"> + <h3>Pages trouvées :</h3> + <ul class="search_quickhits"> + <li> <a href="/xfconf-query" class="wikilink1" title="xfconf-query">xfconf-query</a></li> + </ul> + <div class="clearer"></div> + </div> + """ + response = mock.Mock(text=html) + results = doku.response(response) + expected = [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}] + self.assertEqual(doku.response(response), expected) + + html = u""" + <dl class="search_results"> + <dt><a href="/xvnc?s[]=query" class="wikilink1" title="xvnc">xvnc</a>: 40 Occurrences trouvées</dt> + <dd>er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 640x480 ... er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 800x600 ... er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 1024x768 ... er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 1280x1024 -depth 8 -Sec</dd> + <dt><a href="/postfix_mysql_tls_sasl_1404?s[]=query" + class="wikilink1" + title="postfix_mysql_tls_sasl_1404">postfix_mysql_tls_sasl_1404</a>: 14 Occurrences trouvées</dt> + <dd>tdepasse + hosts = 127.0.0.1 + dbname = postfix + <strong class="search_hit">query</strong> = SELECT goto FROM alias WHERE address='%s' AND a... tdepasse + hosts = 127.0.0.1 + dbname = postfix + <strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' + #optional <strong class="search_hit">query</strong> to use when relaying for backup MX + #<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' and backupmx =</dd> + <dt><a href="/bind9?s[]=query" class="wikilink1" title="bind9">bind9</a>: 12 Occurrences trouvées</dt> + <dd> printcmd +;; Got answer: +;; ->>HEADER<<- opcode: <strong class="search_hit">QUERY</strong>, status: NOERROR, id: 13427 +;; flags: qr aa rd ra; <strong class="search_hit">QUERY</strong>: 1, ANSWER: 1, AUTHORITY: 1, ADDITIONAL: 1 + +[...] + +;; <strong class="search_hit">Query</strong> time: 1 msec +;; SERVER: 127.0.0.1#53(127.0.0.1) +;... par la requête (<strong class="search_hit">Query</strong> time) , entre la première et la deuxième requête.</dd> + </dl> + """ + response = mock.Mock(text=html) + results = doku.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 3) + self.assertEqual(results[0]['title'], 'xvnc') +# FIXME self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') +# FIXME self.assertEqual(results[0]['content'], 'This should be the content.') diff --git a/tests/unit/engines/test_duckduckgo_definitions.py b/tests/unit/engines/test_duckduckgo_definitions.py index 71c84235c..39da64175 100644 --- a/tests/unit/engines/test_duckduckgo_definitions.py +++ b/tests/unit/engines/test_duckduckgo_definitions.py @@ -123,7 +123,7 @@ class TestDDGDefinitionsEngine(SearxTestCase): self.assertEqual(results[1]['url'], 'result first url') self.assertEqual(results[2]['suggestion'], 'text') self.assertEqual(results[3]['infobox'], 'heading') - self.assertEqual(results[3]['id'], 'http://definition.url') + self.assertEqual(results[3]['id'], 'https://definition.url') self.assertEqual(results[3]['entity'], 'Entity') self.assertIn('abstract', results[3]['content']) self.assertIn('this is the definition', results[3]['content']) @@ -240,7 +240,7 @@ class TestDDGDefinitionsEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['infobox'], 'heading') - self.assertEqual(results[0]['id'], 'http://definition.url') + self.assertEqual(results[0]['id'], 'https://definition.url') self.assertEqual(results[0]['entity'], 'Entity') self.assertIn('abstract', results[0]['content']) self.assertIn('this is the definition', results[0]['content']) diff --git a/tests/unit/engines/test_fdroid.py b/tests/unit/engines/test_fdroid.py new file mode 100644 index 000000000..d75f4f0b4 --- /dev/null +++ b/tests/unit/engines/test_fdroid.py @@ -0,0 +1,49 @@ +import mock +from collections import defaultdict +from searx.engines import fdroid +from searx.testing import SearxTestCase + + +class TestFdroidEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + dic['pageno'] = 1 + params = fdroid.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('f-droid.org' in params['url']) + + def test_response(self): + resp = mock.Mock(text='<html></html>') + self.assertEqual(fdroid.response(resp), []) + + html = """ + <a href="https://google.com/qwerty"> + <div id="appheader"> + <div style="float:left;padding-right:10px;"> + <img src="http://example.com/image.png" + style="width:48px;border:none;"> + </div> + <div style="float:right;"> + <p>Details...</p> + </div> + <p style="color:#000000;"> + <span style="font-size:20px;">Sample title</span> + <br> + Sample content + </p> + </div> + </a> + """ + + resp = mock.Mock(text=html) + results = fdroid.response(resp) + + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['url'], 'https://google.com/qwerty') + self.assertEqual(results[0]['title'], 'Sample title') + self.assertEqual(results[0]['content'], 'Sample content') + self.assertEqual(results[0]['img_src'], 'http://example.com/image.png') diff --git a/tests/unit/engines/test_google_images.py b/tests/unit/engines/test_google_images.py index 876d0af1e..5f184e00c 100644 --- a/tests/unit/engines/test_google_images.py +++ b/tests/unit/engines/test_google_images.py @@ -41,7 +41,7 @@ class TestGoogleImagesEngine(SearxTestCase): </div> </a> <div class="rg_meta"> - {"id":"bQWQ9wz9loJmjM:","isu":"clker.com","ity":"png","md":"/search?tbs\u003dsbi:AMhZZit7u1mHyop9pQisu-5idR-8W_1Itvwc3afChmsjQYPx_1yYMzBvUZgtkcGoojqekKZ-6n_1rjX9ySH0OWA_1eO5OijFY6BBDw_1GApr6xxb1bXJcBcj-DiguMoXWW7cZSG7MRQbwnI5SoDZNXcv_1xGszy886I7NVb_1oRKSliTHtzqbXAxhvYreM","msu":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAQSEgltBZD3DP2WgiG-U42R4G0RFw","oh":598,"os":"13KB","ow":504,"pt":"South Arrow Clip Art at Clker.com - vector clip art online ...","rid":"vlONkeBtERfDuM","s":"Download this image as:","sc":1,"si":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAESEgltBZD3DP2WgiG-U42R4G0RFw","th":245,"tu":"https://thumbnail.url/","tw":206} + {"id":"bQWQ9wz9loJmjM:","isu":"clker.com","ity":"png","md":"/search?tbs\u003dsbi:AMhZZit7u1mHyop9pQisu-5idR-8W_1Itvwc3afChmsjQYPx_1yYMzBvUZgtkcGoojqekKZ-6n_1rjX9ySH0OWA_1eO5OijFY6BBDw_1GApr6xxb1bXJcBcj-DiguMoXWW7cZSG7MRQbwnI5SoDZNXcv_1xGszy886I7NVb_1oRKSliTHtzqbXAxhvYreM","msu":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAQSEgltBZD3DP2WgiG-U42R4G0RFw","oh":598,"os":"13KB","ow":504,"pt":"South Arrow Clip Art at Clker.com - vector clip art online ...","rid":"vlONkeBtERfDuM","s":"Download this image as:","sc":1,"si":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAESEgltBZD3DP2WgiG-U42R4G0RFw","th":245,"tu":"https://thumbnail.url/","tw":206,"ru":"a","ou":"b"} </div> </div><!--n--><!--m--> </div> @@ -52,7 +52,7 @@ class TestGoogleImagesEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], u'South Arrow Clip Art at Clker.com - vector clip art online ...') - self.assertEqual(results[0]['url'], 'http://www.clker.com/clipart-south-arrow.html') + self.assertEqual(results[0]['url'], 'a') self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url/') - self.assertEqual(results[0]['img_src'], 'http://www.clker.com/cliparts/H/X/l/b/0/0/south-arrow-hi.png') + self.assertEqual(results[0]['img_src'], 'b') self.assertEqual(results[0]['content'], 'Download this image as:') diff --git a/tests/unit/engines/test_nyaa.py b/tests/unit/engines/test_nyaa.py new file mode 100644 index 000000000..db412e1cc --- /dev/null +++ b/tests/unit/engines/test_nyaa.py @@ -0,0 +1,66 @@ +from collections import defaultdict +import mock +from searx.engines import nyaa +from searx.testing import SearxTestCase + + +class TestNyaaEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + dic['pageno'] = 1 + params = nyaa.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('nyaa.se' in params['url']) + + def test_response(self): + resp = mock.Mock(text='<html></html>') + self.assertEqual(nyaa.response(resp), []) + + html = """ + <table class="tlist"> + <tbody> + <tr class="trusted tlistrow"> + <td class="tlisticon"> + <a href="//www.nyaa.se" title="English-translated Anime"> + <img src="//files.nyaa.se" alt="English-translated Anime"> + </a> + </td> + <td class="tlistname"> + <a href="//www.nyaa.se/?page3"> + Sample torrent title + </a> + </td> + <td class="tlistdownload"> + <a href="//www.nyaa.se/?page_dl" title="Download"> + <img src="//files.nyaa.se/www-dl.png" alt="DL"> + </a> + </td> + <td class="tlistsize">10 MiB</td> + <td class="tlistsn">1</td> + <td class="tlistln">3</td> + <td class="tlistdn">666</td> + <td class="tlistmn">0</td> + </tr> + </tbody> + </table> + """ + + resp = mock.Mock(text=html) + results = nyaa.response(resp) + + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + + r = results[0] + self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0) + self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0) + self.assertTrue(r['content'].find('English-translated Anime') >= 0) + self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0) + + self.assertEqual(r['title'], 'Sample torrent title') + self.assertEqual(r['seed'], 1) + self.assertEqual(r['leech'], 3) + self.assertEqual(r['filesize'], 10 * 1024 * 1024) diff --git a/tests/unit/engines/test_reddit.py b/tests/unit/engines/test_reddit.py new file mode 100644 index 000000000..9c94f4e2b --- /dev/null +++ b/tests/unit/engines/test_reddit.py @@ -0,0 +1,71 @@ +from collections import defaultdict +import mock +from searx.engines import reddit +from searx.testing import SearxTestCase +from datetime import datetime + + +class TestRedditEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + params = reddit.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('reddit.com' in params['url']) + + def test_response(self): + resp = mock.Mock(text='{}') + self.assertEqual(reddit.response(resp), []) + + json = """ + { + "kind": "Listing", + "data": { + "children": [{ + "data": { + "url": "http://google2.com/", + "permalink": "http://google.com/", + "title": "Title number one", + "selftext": "Sample", + "created_utc": 1401219957.0, + "thumbnail": "http://image.com/picture.jpg" + } + }, { + "data": { + "url": "https://reddit2.com/", + "permalink": "https://reddit.com/", + "title": "Title number two", + "selftext": "Dominus vobiscum", + "created_utc": 1438792533.0, + "thumbnail": "self" + } + }] + } + } + """ + + resp = mock.Mock(text=json) + results = reddit.response(resp) + + self.assertEqual(len(results), 2) + self.assertEqual(type(results), list) + + # testing first result (picture) + r = results[0] + self.assertEqual(r['url'], 'http://google.com/') + self.assertEqual(r['title'], 'Title number one') + self.assertEqual(r['template'], 'images.html') + self.assertEqual(r['img_src'], 'http://google2.com/') + self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg') + + # testing second result (self-post) + r = results[1] + self.assertEqual(r['url'], 'https://reddit.com/') + self.assertEqual(r['title'], 'Title number two') + self.assertEqual(r['content'], 'Dominus vobiscum') + created = datetime.fromtimestamp(1438792533.0) + self.assertEqual(r['publishedDate'], created) + self.assertTrue('thumbnail_src' not in r) + self.assertTrue('img_src' not in r) diff --git a/tests/unit/engines/test_tokyotoshokan.py b/tests/unit/engines/test_tokyotoshokan.py new file mode 100644 index 000000000..efe7dbfc2 --- /dev/null +++ b/tests/unit/engines/test_tokyotoshokan.py @@ -0,0 +1,110 @@ +import mock +from collections import defaultdict +from searx.engines import tokyotoshokan +from searx.testing import SearxTestCase +from datetime import datetime + + +class TestTokyotoshokanEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + dic['pageno'] = 1 + params = tokyotoshokan.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('tokyotosho.info' in params['url']) + + def test_response(self): + resp = mock.Mock(text='<html></html>') + self.assertEqual(tokyotoshokan.response(resp), []) + + html = """ + <table class="listing"> + <tbody> + <tr class="shade category_0"> + <td rowspan="2"> + <a href="/?cat=7"><span class="sprite_cat-raw"></span></a> + </td> + <td class="desc-top"> + <a href="magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b"> + <span class="sprite_magnet"></span> + </a> + <a rel="nofollow" type="application/x-bittorrent" href="http://www.nyaa.se/f"> + Koyomimonogatari + </a> + </td> + <td class="web"><a rel="nofollow" href="details.php?id=975700">Details</a></td> + </tr> + <tr class="shade category_0"> + <td class="desc-bot"> + Authorized: <span class="auth_ok">Yes</span> + Submitter: <a href="?username=Ohys">Ohys</a> | + Size: 10.5MB | + Date: 2016-03-26 16:41 UTC | + Comment: sample comment + </td> + <td style="color: #BBB; font-family: monospace" class="stats" align="right"> + S: <span style="color: red">53</span> + L: <span style="color: red">18</span> + C: <span style="color: red">0</span> + ID: 975700 + </td> + </tr> + + <tr class="category_0"> + <td rowspan="2"> + <a href="/?cat=7"><span class="sprite_cat-raw"></span></a> + </td> + <td class="desc-top"> + <a rel="nofollow" type="application/x-bittorrent" href="http://google.com/q"> + Owarimonogatari + </a> + </td> + <td class="web"><a rel="nofollow" href="details.php?id=975700">Details</a></td> + </tr> + <tr class="category_0"> + <td class="desc-bot"> + Submitter: <a href="?username=Ohys">Ohys</a> | + Size: 932.84EB | + Date: QWERTY-03-26 16:41 UTC + </td> + <td style="color: #BBB; font-family: monospace" class="stats" align="right"> + S: <span style="color: red">0</span> + </td> + </tr> + </tbody> + </table> + """ + + resp = mock.Mock(text=html) + results = tokyotoshokan.response(resp) + + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + + # testing the first result, which has correct format + # and should have all information fields filled + r = results[0] + self.assertEqual(r['url'], 'http://www.nyaa.se/f') + self.assertEqual(r['title'], 'Koyomimonogatari') + self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b') + self.assertEqual(r['filesize'], int(1024 * 1024 * 10.5)) + self.assertEqual(r['publishedDate'], datetime(2016, 03, 26, 16, 41)) + self.assertEqual(r['content'], 'Comment: sample comment') + self.assertEqual(r['seed'], 53) + self.assertEqual(r['leech'], 18) + + # testing the second result, which does not include magnet link, + # seed & leech info, and has incorrect size & creation date + r = results[1] + self.assertEqual(r['url'], 'http://google.com/q') + self.assertEqual(r['title'], 'Owarimonogatari') + + self.assertFalse('magnetlink' in r) + self.assertFalse('filesize' in r) + self.assertFalse('content' in r) + self.assertFalse('publishedDate' in r) + self.assertFalse('seed' in r) + self.assertFalse('leech' in r) diff --git a/tests/unit/engines/test_torrentz.py b/tests/unit/engines/test_torrentz.py new file mode 100644 index 000000000..2f836f73e --- /dev/null +++ b/tests/unit/engines/test_torrentz.py @@ -0,0 +1,91 @@ +import mock +from collections import defaultdict +from searx.engines import torrentz +from searx.testing import SearxTestCase +from datetime import datetime + + +class TestTorrentzEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + dic['pageno'] = 1 + params = torrentz.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('torrentz.eu' in params['url']) + + def test_response(self): + resp = mock.Mock(text='<html></html>') + self.assertEqual(torrentz.response(resp), []) + + html = """ + <div class="results"> + <dl> + <dt> + <a href="/4362e08b1d80e1820fb2550b752f9f3126fe76d6"> + Completely valid info + </a> + books ebooks + </dt> + <dd> + <span class="v">1</span> + <span class="a"> + <span title="Sun, 22 Nov 2015 03:01:42">4 months</span> + </span> + <span class="s">30 MB</span> + <span class="u">14</span> + <span class="d">1</span> + </dd> + </dl> + + <dl> + <dt> + <a href="/poaskdpokaspod"> + Invalid hash and date and filesize + </a> + books ebooks + </dt> + <dd> + <span class="v">1</span> + <span class="a"> + <span title="Sun, 2124091j0j190gm42">4 months</span> + </span> + <span class="s">30MB</span> + <span class="u">5,555</span> + <span class="d">1,234,567</span> + </dd> + </dl> + </div> + """ + + resp = mock.Mock(text=html) + results = torrentz.response(resp) + + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + + # testing against the first result + r = results[0] + self.assertEqual(r['url'], 'https://torrentz.eu/4362e08b1d80e1820fb2550b752f9f3126fe76d6') + self.assertEqual(r['title'], 'Completely valid info books ebooks') + # 22 Nov 2015 03:01:42 + self.assertEqual(r['publishedDate'], datetime(2015, 11, 22, 3, 1, 42)) + self.assertEqual(r['seed'], 14) + self.assertEqual(r['leech'], 1) + self.assertEqual(r['filesize'], 30 * 1024 * 1024) + self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4362e08b1d80e1820fb2550b752f9f3126fe76d6') + + # testing against the second result + r = results[1] + self.assertEqual(r['url'], 'https://torrentz.eu/poaskdpokaspod') + self.assertEqual(r['title'], 'Invalid hash and date and filesize books ebooks') + self.assertEqual(r['seed'], 5555) + self.assertEqual(r['leech'], 1234567) + + # in the second result we have invalid hash, creation date & torrent size, + # so these tests should fail + self.assertFalse('magnetlink' in r) + self.assertFalse('filesize' in r) + self.assertFalse('publishedDate' in r) diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py new file mode 100644 index 000000000..d1c44036d --- /dev/null +++ b/tests/unit/engines/test_wikipedia.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wikipedia +from searx.testing import SearxTestCase + + +class TestWikipediaEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['language'] = 'fr_FR' + params = wikipedia.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('test_query', params['url']) + self.assertIn('Test_Query', params['url']) + self.assertIn('fr.wikipedia.org', params['url']) + + query = 'Test_Query' + params = wikipedia.request(query, dicto) + self.assertIn('Test_Query', params['url']) + self.assertNotIn('test_query', params['url']) + + dicto['language'] = 'all' + params = wikipedia.request(query, dicto) + self.assertIn('en', params['url']) + + def test_response(self): + dicto = defaultdict(dict) + dicto['language'] = 'fr' + + self.assertRaises(AttributeError, wikipedia.response, None) + self.assertRaises(AttributeError, wikipedia.response, []) + self.assertRaises(AttributeError, wikipedia.response, '') + self.assertRaises(AttributeError, wikipedia.response, '[]') + + # page not found + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "-1": { + "ns": 0, + "title": "", + "missing": "" + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + self.assertEqual(wikipedia.response(response), []) + + # normal case + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "The Title", + "extract": "The Title is...", + "thumbnail": { + "source": "img_src.jpg" + }, + "pageimage": "img_name.jpg" + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], u'The Title') + self.assertIn('fr.wikipedia.org/wiki/The_Title', results[0]['url']) + self.assertEqual(results[1]['infobox'], u'The Title') + self.assertIn('fr.wikipedia.org/wiki/The_Title', results[1]['id']) + self.assertIn('The Title is...', results[1]['content']) + self.assertEqual(results[1]['img_src'], 'img_src.jpg') + + # disambiguation page + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "The Title", + "extract": "The Title can be:\\nThe Title 1\\nThe Title 2\\nThe Title 3\\nThe Title 4......................................................................................................................................." """ # noqa + json += """ + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + # no image + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "The Title", + "extract": "The Title is......................................................................................................................................................................................." """ # noqa + json += """ + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn('The Title is...', results[1]['content']) + self.assertEqual(results[1]['img_src'], None) + + # title not in first paragraph + json = u""" + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "披頭四樂隊", + "extract": "披头士乐队....................................................................................................................................................................................................\\n披頭四樂隊...", """ # noqa + json += """ + "thumbnail": { + "source": "img_src.jpg" + }, + "pageimage": "img_name.jpg" + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[1]['infobox'], u'披頭四樂隊') + self.assertIn(u'披头士乐队...', results[1]['content']) diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py new file mode 100644 index 000000000..e418c0af4 --- /dev/null +++ b/tests/unit/test_preferences.py @@ -0,0 +1,101 @@ +from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, + MultipleChoiceSetting, PluginsSetting, ValidationException) +from searx.testing import SearxTestCase + + +class PluginStub(object): + def __init__(self, id, default_on): + self.id = id + self.default_on = default_on + + +class TestSettings(SearxTestCase): + # map settings + def test_map_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = MapSetting(3, wrong_argument={'0': 0}) + + def test_map_setting_invalid_default_value(self): + with self.assertRaises(ValidationException): + setting = MapSetting(3, map={'dog': 1, 'bat': 2}) + + def test_map_setting_invalid_choice(self): + setting = MapSetting(2, map={'dog': 1, 'bat': 2}) + with self.assertRaises(ValidationException): + setting.parse('cat') + + def test_map_setting_valid_default(self): + setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3}) + self.assertEquals(setting.get_value(), 3) + + def test_map_setting_valid_choice(self): + setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3}) + self.assertEquals(setting.get_value(), 3) + setting.parse('bat') + self.assertEquals(setting.get_value(), 2) + + def test_enum_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = EnumStringSetting('cat', wrong_argument=[0, 1, 2]) + + # enum settings + def test_enum_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = EnumStringSetting('cat', wrong_argument=[0, 1, 2]) + + def test_enum_setting_invalid_default_value(self): + with self.assertRaises(ValidationException): + setting = EnumStringSetting(3, choices=[0, 1, 2]) + + def test_enum_setting_invalid_choice(self): + setting = EnumStringSetting(0, choices=[0, 1, 2]) + with self.assertRaises(ValidationException): + setting.parse(3) + + def test_enum_setting_valid_default(self): + setting = EnumStringSetting(3, choices=[1, 2, 3]) + self.assertEquals(setting.get_value(), 3) + + def test_enum_setting_valid_choice(self): + setting = EnumStringSetting(3, choices=[1, 2, 3]) + self.assertEquals(setting.get_value(), 3) + setting.parse(2) + self.assertEquals(setting.get_value(), 2) + + # multiple choice settings + def test_multiple_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = MultipleChoiceSetting(['2'], wrong_argument=['0', '1', '2']) + + def test_multiple_setting_invalid_default_value(self): + with self.assertRaises(ValidationException): + setting = MultipleChoiceSetting(['3', '4'], choices=['0', '1', '2']) + + def test_multiple_setting_invalid_choice(self): + setting = MultipleChoiceSetting(['1', '2'], choices=['0', '1', '2']) + with self.assertRaises(ValidationException): + setting.parse('4, 3') + + def test_multiple_setting_valid_default(self): + setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3']) + self.assertEquals(setting.get_value(), ['3']) + + def test_multiple_setting_valid_choice(self): + setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3']) + self.assertEquals(setting.get_value(), ['3']) + setting.parse('2') + self.assertEquals(setting.get_value(), ['2']) + + # plugins settings + def test_plugins_setting_all_default_enabled(self): + plugin1 = PluginStub('plugin1', True) + plugin2 = PluginStub('plugin2', True) + setting = PluginsSetting(['3'], choices=[plugin1, plugin2]) + self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin2'])) + + def test_plugins_setting_few_default_enabled(self): + plugin1 = PluginStub('plugin1', True) + plugin2 = PluginStub('plugin2', False) + plugin3 = PluginStub('plugin3', True) + setting = PluginsSetting('name', choices=[plugin1, plugin2, plugin3]) + self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin3'])) diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index 071c01df3..5697017d9 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -12,7 +12,6 @@ class ViewsTestCase(SearxTestCase): def setUp(self): webapp.app.config['TESTING'] = True # to get better error messages self.app = webapp.app.test_client() - webapp.default_theme = 'default' # set some defaults self.test_results = [ @@ -43,6 +42,11 @@ class ViewsTestCase(SearxTestCase): webapp.Search.search = search_mock + def get_current_theme_name_mock(override=None): + return 'default' + + webapp.get_current_theme_name = get_current_theme_name_mock + self.maxDiff = None # to see full diffs def test_index_empty(self): |