diff options
Diffstat (limited to 'searx')
33 files changed, 1524 insertions, 230 deletions
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 26830a167..b0ffb490a 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -9,13 +9,13 @@ categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(u'^\W*(\d+(?:\.\d+)?)\W*([^.0-9].+)\W+in?\W+([^\.]+)\W*$', re.I) # noqa +parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa db = 1 def normalize_name(name): - name = name.lower().replace('-', ' ') + name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() diff --git a/searx/engines/doku.py b/searx/engines/doku.py new file mode 100644 index 000000000..93867fd0d --- /dev/null +++ b/searx/engines/doku.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://localhost:8090' +search_url = '/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 793e97d22..208ccca28 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,5 +1,6 @@ import json from urllib import urlencode +from re import compile, sub from lxml import html from searx.utils import html_to_text from searx.engines.xpath import extract_text @@ -7,6 +8,8 @@ from searx.engines.xpath import extract_text url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' +http_regex = compile(r'^http:') + def result_to_text(url, text, htmlResult): # TODO : remove result ending with "Meaning" or "Category" @@ -19,8 +22,8 @@ def result_to_text(url, text, htmlResult): def request(query, params): - # TODO add kl={locale} params['url'] = url.format(query=urlencode({'q': query})) + params['headers']['Accept-Language'] = params['language'] return params @@ -103,6 +106,10 @@ def response(resp): urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) + # to merge with wikidata's infobox + if infobox_id: + infobox_id = http_regex.sub('https:', infobox_id) + # entity entity = search_res.get('Entity', None) # TODO continent / country / department / location / waterfall / diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py new file mode 100644 index 000000000..0b16773e3 --- /dev/null +++ b/searx/engines/fdroid.py @@ -0,0 +1,53 @@ +""" + F-Droid (a repository of FOSS applications for Android) + + @website https://f-droid.org/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content +""" + +from cgi import escape +from urllib import urlencode +from searx.engines.xpath import extract_text +from lxml import html + +# engine dependent config +categories = ['files'] +paging = True + +# search-url +base_url = 'https://f-droid.org/' +search_url = base_url + 'repository/browse/?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'fdfilter': query, + 'fdpage': params['pageno']}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for app in dom.xpath('//div[@id="appheader"]'): + url = app.xpath('./ancestor::a/@href')[0] + title = app.xpath('./p/span/text()')[0] + img_src = app.xpath('.//img/@src')[0] + + content = extract_text(app.xpath('./p')[0]) + content = escape(content.replace(title, '', 1).strip()) + + results.append({'url': url, + 'title': title, + 'content': content, + 'img_src': img_src}) + + return results diff --git a/searx/engines/google.py b/searx/engines/google.py index 313932200..6018ad1b2 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -46,11 +46,11 @@ country_to_hostname = { 'NZ': 'www.google.co.nz', # New Zealand 'PH': 'www.google.com.ph', # Philippines 'SG': 'www.google.com.sg', # Singapore - # 'US': 'www.google.us', # United State, redirect to .com + # 'US': 'www.google.us', # United States, redirect to .com 'ZA': 'www.google.co.za', # South Africa 'AR': 'www.google.com.ar', # Argentina 'CL': 'www.google.cl', # Chile - 'ES': 'www.google.es', # Span + 'ES': 'www.google.es', # Spain 'MX': 'www.google.com.mx', # Mexico 'EE': 'www.google.ee', # Estonia 'FI': 'www.google.fi', # Finland @@ -61,7 +61,7 @@ country_to_hostname = { 'HU': 'www.google.hu', # Hungary 'IT': 'www.google.it', # Italy 'JP': 'www.google.co.jp', # Japan - 'KR': 'www.google.co.kr', # South Korean + 'KR': 'www.google.co.kr', # South Korea 'LT': 'www.google.lt', # Lithuania 'LV': 'www.google.lv', # Latvia 'NO': 'www.google.no', # Norway @@ -76,9 +76,9 @@ country_to_hostname = { 'SE': 'www.google.se', # Sweden 'TH': 'www.google.co.th', # Thailand 'TR': 'www.google.com.tr', # Turkey - 'UA': 'www.google.com.ua', # Ikraine - # 'CN': 'www.google.cn', # China, only from china ? - 'HK': 'www.google.com.hk', # Hong kong + 'UA': 'www.google.com.ua', # Ukraine + # 'CN': 'www.google.cn', # China, only from China ? + 'HK': 'www.google.com.hk', # Hong Kong 'TW': 'www.google.com.tw' # Taiwan } diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 9d51428cc..efe46812a 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -49,8 +49,6 @@ def response(resp): # parse results for result in dom.xpath('//div[@data-ved]'): - data_url = result.xpath('./a/@href')[0] - data_query = {k: v[0] for k, v in parse_qs(data_url.split('?', 1)[1]).iteritems()} metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0]) @@ -60,11 +58,11 @@ def response(resp): thumbnail_src = thumbnail_src.replace("http://", "https://") # append result - results.append({'url': data_query['imgrefurl'], + results.append({'url': metadata['ru'], 'title': metadata['pt'], 'content': metadata['s'], - 'thumbnail_src': metadata['tu'], - 'img_src': data_query['imgurl'], + 'thumbnail_src': thumbnail_src, + 'img_src': metadata['ou'], 'template': 'images.html'}) # return results diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py new file mode 100644 index 000000000..cda8231f7 --- /dev/null +++ b/searx/engines/nyaa.py @@ -0,0 +1,119 @@ +""" + Nyaa.se (Anime Bittorrent tracker) + + @website http://www.nyaa.se/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content, seed, leech, torrentfile +""" + +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['files', 'images', 'videos', 'music'] +paging = True + +# search-url +base_url = 'http://www.nyaa.se/' +search_url = base_url + '?page=search&{query}&offset={offset}' + +# xpath queries +xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' +xpath_category = './/td[@class="tlisticon"]/a' +xpath_title = './/td[@class="tlistname"]/a' +xpath_torrent_file = './/td[@class="tlistdownload"]/a' +xpath_filesize = './/td[@class="tlistsize"]/text()' +xpath_seeds = './/td[@class="tlistsn"]/text()' +xpath_leeches = './/td[@class="tlistln"]/text()' +xpath_downloads = './/td[@class="tlistdn"]/text()' + + +# convert a variable to integer or return 0 if it's not a number +def int_or_zero(num): + if isinstance(num, list): + if len(num) < 1: + return 0 + num = num[0] + if num.isdigit(): + return int(num) + return 0 + + +# get multiplier to convert torrent size to bytes +def get_filesize_mul(suffix): + return { + 'KB': 1024, + 'MB': 1024 ** 2, + 'GB': 1024 ** 3, + 'TB': 1024 ** 4, + + 'KIB': 1024, + 'MIB': 1024 ** 2, + 'GIB': 1024 ** 3, + 'TIB': 1024 ** 4 + }[str(suffix).upper()] + + +# do search-request +def request(query, params): + query = urlencode({'term': query}) + params['url'] = search_url.format(query=query, offset=params['pageno']) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath(xpath_results): + # category in which our torrent belongs + category = result.xpath(xpath_category)[0].attrib.get('title') + + # torrent title + page_a = result.xpath(xpath_title)[0] + title = escape(extract_text(page_a)) + + # link to the page + href = page_a.attrib.get('href') + + # link to the torrent file + torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') + + # torrent size + try: + file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') + file_size = int(float(file_size) * get_filesize_mul(suffix)) + except Exception as e: + file_size = None + + # seed count + seed = int_or_zero(result.xpath(xpath_seeds)) + + # leech count + leech = int_or_zero(result.xpath(xpath_leeches)) + + # torrent downloads count + downloads = int_or_zero(result.xpath(xpath_downloads)) + + # content string contains all information not included into template + content = 'Category: "{category}". Downloaded {downloads} times.' + content = content.format(category=category, downloads=downloads) + content = escape(content) + + results.append({'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': file_size, + 'torrentfile': torrent_link, + 'template': 'torrent.html'}) + + return results diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py new file mode 100644 index 000000000..3ca7e44f6 --- /dev/null +++ b/searx/engines/reddit.py @@ -0,0 +1,79 @@ +""" + Reddit + + @website https://www.reddit.com/ + @provide-api yes (https://www.reddit.com/dev/api) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, thumbnail, publishedDate +""" + +import json +from cgi import escape +from urllib import urlencode +from urlparse import urlparse, urljoin +from datetime import datetime + +# engine dependent config +categories = ['general', 'images', 'news', 'social media'] +page_size = 25 + +# search-url +base_url = 'https://www.reddit.com/' +search_url = base_url + 'search.json?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'q': query, + 'limit': page_size}) + params['url'] = search_url.format(query=query) + + return params + + +# get response from search-request +def response(resp): + img_results = [] + text_results = [] + + search_results = json.loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + posts = search_results.get('data', {}).get('children', []) + + # process results + for post in posts: + data = post['data'] + + # extract post information + params = { + 'url': urljoin(base_url, data['permalink']), + 'title': data['title'] + } + + # if thumbnail field contains a valid URL, we need to change template + thumbnail = data['thumbnail'] + url_info = urlparse(thumbnail) + # netloc & path + if url_info[1] != '' and url_info[2] != '': + params['img_src'] = data['url'] + params['thumbnail_src'] = thumbnail + params['template'] = 'images.html' + img_results.append(params) + else: + created = datetime.fromtimestamp(data['created_utc']) + content = escape(data['selftext']) + if len(content) > 500: + content = content[:500] + '...' + params['content'] = content + params['publishedDate'] = created + text_results.append(params) + + # show images first and text results second + return img_results + text_results diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py new file mode 100644 index 000000000..17e8e2191 --- /dev/null +++ b/searx/engines/tokyotoshokan.py @@ -0,0 +1,102 @@ +""" + Tokyo Toshokan (A BitTorrent Library for Japanese Media) + + @website https://www.tokyotosho.info/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, publishedDate, seed, leech, + filesize, magnetlink, content +""" + +import re +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text +from datetime import datetime +from searx.engines.nyaa import int_or_zero, get_filesize_mul + +# engine dependent config +categories = ['files', 'videos', 'music'] +paging = True + +# search-url +base_url = 'https://www.tokyotosho.info/' +search_url = base_url + 'search.php?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'page': params['pageno'], + 'terms': query}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + rows = dom.xpath('//table[@class="listing"]//tr[contains(@class, "category_0")]') + + # check if there are no results or page layout was changed so we cannot parse it + # currently there are two rows for each result, so total count must be even + if len(rows) == 0 or len(rows) % 2 != 0: + return [] + + # regular expression for parsing torrent size strings + size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) + + # processing the results, two rows at a time + for i in xrange(0, len(rows), 2): + # parse the first row + name_row = rows[i] + + links = name_row.xpath('./td[@class="desc-top"]/a') + params = { + 'template': 'torrent.html', + 'url': links[-1].attrib.get('href'), + 'title': extract_text(links[-1]) + } + # I have not yet seen any torrents without magnet links, but + # it's better to be prepared to stumble upon one some day + if len(links) == 2: + magnet = links[0].attrib.get('href') + if magnet.startswith('magnet'): + # okay, we have a valid magnet link, let's add it to the result + params['magnetlink'] = magnet + + # no more info in the first row, start parsing the second one + info_row = rows[i + 1] + desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0]) + for item in desc.split('|'): + item = item.strip() + if item.startswith('Size:'): + try: + # ('1.228', 'GB') + groups = size_re.match(item).groups() + multiplier = get_filesize_mul(groups[1]) + params['filesize'] = int(multiplier * float(groups[0])) + except Exception as e: + pass + elif item.startswith('Date:'): + try: + # Date: 2016-02-21 21:44 UTC + date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC') + params['publishedDate'] = date + except Exception as e: + pass + elif item.startswith('Comment:'): + params['content'] = item + stats = info_row.xpath('./td[@class="stats"]/span') + # has the layout not changed yet? + if len(stats) == 3: + params['seed'] = int_or_zero(extract_text(stats[0])) + params['leech'] = int_or_zero(extract_text(stats[1])) + + results.append(params) + + return results diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py new file mode 100644 index 000000000..92fbe7013 --- /dev/null +++ b/searx/engines/torrentz.py @@ -0,0 +1,93 @@ +""" + Torrentz.eu (BitTorrent meta-search engine) + + @website https://torrentz.eu/ + @provide-api no + + @using-api no + @results HTML + @stable no (HTML can change, although unlikely, + see https://torrentz.eu/torrentz.btsearch) + @parse url, title, publishedDate, seed, leech, filesize, magnetlink +""" + +import re +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text +from datetime import datetime +from searx.engines.nyaa import int_or_zero, get_filesize_mul + +# engine dependent config +categories = ['files', 'videos', 'music'] +paging = True + +# search-url +# https://torrentz.eu/search?f=EXAMPLE&p=6 +base_url = 'https://torrentz.eu/' +search_url = base_url + 'search?{query}' + + +# do search-request +def request(query, params): + page = params['pageno'] - 1 + query = urlencode({'q': query, 'p': page}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath('//div[@class="results"]/dl'): + name_cell = result.xpath('./dt')[0] + title = extract_text(name_cell) + + # skip rows that do not contain a link to a torrent + links = name_cell.xpath('./a') + if len(links) != 1: + continue + + # extract url and remove a slash in the beginning + link = links[0].attrib.get('href').lstrip('/') + + seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '') + leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '') + + params = { + 'url': base_url + link, + 'title': title, + 'seed': int_or_zero(seed), + 'leech': int_or_zero(leech), + 'template': 'torrent.html' + } + + # let's try to calculate the torrent size + try: + size_str = result.xpath('./dd/span[@class="s"]/text()')[0] + size, suffix = size_str.split() + params['filesize'] = int(size) * get_filesize_mul(suffix) + except Exception as e: + pass + + # does our link contain a valid SHA1 sum? + if re.compile('[0-9a-fA-F]{40}').match(link): + # add a magnet link to the result + params['magnetlink'] = 'magnet:?xt=urn:btih:' + link + + # extract and convert creation date + try: + date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title') + # Fri, 25 Mar 2016 16:29:01 + date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S') + params['publishedDate'] = date + except Exception as e: + pass + + results.append(params) + + return results diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 9f3496b72..8aa2fcd5c 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -86,15 +86,15 @@ def getDetail(jsonresponse, wikidata_id, language, locale): results.append({'title': title, 'url': official_website}) wikipedia_link_count = 0 + wikipedia_link = get_wikilink(result, language + 'wiki') + wikipedia_link_count += add_url(urls, + 'Wikipedia (' + language + ')', + wikipedia_link) if language != 'en': + wikipedia_en_link = get_wikilink(result, 'enwiki') wikipedia_link_count += add_url(urls, - 'Wikipedia (' + language + ')', - get_wikilink(result, language + - 'wiki')) - wikipedia_en_link = get_wikilink(result, 'enwiki') - wikipedia_link_count += add_url(urls, - 'Wikipedia (en)', - wikipedia_en_link) + 'Wikipedia (en)', + wikipedia_en_link) if wikipedia_link_count == 0: misc_language = get_wiki_firstlanguage(result, 'wiki') if misc_language is not None: @@ -188,7 +188,7 @@ def getDetail(jsonresponse, wikidata_id, language, locale): else: results.append({ 'infobox': title, - 'id': wikipedia_en_link, + 'id': wikipedia_link, 'content': description, 'attributes': attributes, 'urls': urls diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py new file mode 100644 index 000000000..fed7b263f --- /dev/null +++ b/searx/engines/wikipedia.py @@ -0,0 +1,114 @@ +""" + Wikipedia (Web) + + @website https://{language}.wikipedia.org + @provide-api yes + + @using-api yes + @results JSON + @stable yes + @parse url, infobox +""" + +from json import loads +from urllib import urlencode, quote + +# search-url +base_url = 'https://{language}.wikipedia.org/' +search_postfix = 'w/api.php?'\ + 'action=query'\ + '&format=json'\ + '&{query}'\ + '&prop=extracts|pageimages'\ + '&exintro'\ + '&explaintext'\ + '&pithumbsize=300'\ + '&redirects' + + +# set language in base_url +def url_lang(lang): + if lang == 'all': + language = 'en' + else: + language = lang.split('_')[0] + + return base_url.format(language=language) + + +# do search-request +def request(query, params): + if query.islower(): + query += '|' + query.title() + + params['url'] = url_lang(params['language']) \ + + search_postfix.format(query=urlencode({'titles': query})) + + return params + + +# get first meaningful paragraph +# this should filter out disambiguation pages and notes above first paragraph +# "magic numbers" were obtained by fine tuning +def extract_first_paragraph(content, title, image): + first_paragraph = None + + failed_attempts = 0 + for paragraph in content.split('\n'): + + starts_with_title = paragraph.lower().find(title.lower(), 0, len(title) + 35) + length = len(paragraph) + + if length >= 200 or (starts_with_title >= 0 and (image or length >= 150)): + first_paragraph = paragraph + break + + failed_attempts += 1 + if failed_attempts > 3: + return None + + return first_paragraph + + +# get response from search-request +def response(resp): + results = [] + + search_result = loads(resp.content) + + # wikipedia article's unique id + # first valid id is assumed to be the requested article + for article_id in search_result['query']['pages']: + page = search_result['query']['pages'][article_id] + if int(article_id) > 0: + break + + if int(article_id) < 0: + return [] + + title = page.get('title') + + image = page.get('thumbnail') + if image: + image = image.get('source') + + extract = page.get('extract') + + summary = extract_first_paragraph(extract, title, image) + if not summary: + return [] + + # link to wikipedia article + # parenthesis are not quoted to make infobox mergeable with wikidata's + wikipedia_link = url_lang(resp.search_params['language']) \ + + 'wiki/' + quote(title.replace(' ', '_').encode('utf8')).replace('%28', '(').replace('%29', ')') + + results.append({'url': wikipedia_link, 'title': title}) + + results.append({'infobox': title, + 'id': wikipedia_link, + 'content': summary, + 'img_src': image, + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) + + return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index f51634be0..e701c02bf 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -11,6 +11,14 @@ title_xpath = None suggestion_xpath = '' results_xpath = '' +# parameters for engines with paging support +# +# number of results on each page +# (only needed if the site requires not a page number, but an offset) +page_size = 1 +# number of the first page (usually 0 or 1) +first_page_num = 1 + ''' if xpath_results is list, extract the text from each result and concat the list @@ -76,8 +84,14 @@ def normalize_url(url): def request(query, params): query = urlencode({'q': query})[2:] - params['url'] = search_url.format(query=query) + + fp = {'query': query} + if paging and search_url.find('{pageno}') >= 0: + fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + + params['url'] = search_url.format(**fp) params['query'] = query + return params diff --git a/searx/languages.py b/searx/languages.py index b67da9d22..70459a577 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -20,10 +20,10 @@ language_codes = ( ("ar_XA", "Arabic", "Arabia"), ("bg_BG", "Bulgarian", "Bulgaria"), ("cs_CZ", "Czech", "Czech Republic"), - ("de_DE", "German", "Germany"), ("da_DK", "Danish", "Denmark"), ("de_AT", "German", "Austria"), ("de_CH", "German", "Switzerland"), + ("de_DE", "German", "Germany"), ("el_GR", "Greek", "Greece"), ("en_AU", "English", "Australia"), ("en_CA", "English", "Canada"), @@ -58,10 +58,10 @@ language_codes = ( ("ko_KR", "Korean", "Korea"), ("lt_LT", "Lithuanian", "Lithuania"), ("lv_LV", "Latvian", "Latvia"), - ("oc_OC", "Occitan", "Occitan"), ("nb_NO", "Norwegian", "Norway"), ("nl_BE", "Dutch", "Belgium"), ("nl_NL", "Dutch", "Netherlands"), + ("oc_OC", "Occitan", "Occitan"), ("pl_PL", "Polish", "Poland"), ("pt_BR", "Portuguese", "Brazil"), ("pt_PT", "Portuguese", "Portugal"), diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 87cc01382..efb9b0682 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -23,7 +23,8 @@ from searx.plugins import (https_rewrite, open_results_on_new_tab, self_info, search_on_category_select, - tracker_url_remover) + tracker_url_remover, + vim_hotkeys) required_attrs = (('name', str), ('description', str), @@ -77,3 +78,4 @@ plugins.register(open_results_on_new_tab) plugins.register(self_info) plugins.register(search_on_category_select) plugins.register(tracker_url_remover) +plugins.register(vim_hotkeys) diff --git a/searx/plugins/vim_hotkeys.py b/searx/plugins/vim_hotkeys.py new file mode 100644 index 000000000..e537a3ac8 --- /dev/null +++ b/searx/plugins/vim_hotkeys.py @@ -0,0 +1,10 @@ +from flask.ext.babel import gettext + +name = gettext('Vim-like hotkeys') +description = gettext('Navigate search results with Vim-like hotkeys ' + '(JavaScript required). ' + 'Press "h" key on main or result page to get help.') +default_on = False + +js_dependencies = ('plugins/js/vim_hotkeys.js',) +css_dependencies = ('plugins/css/vim_hotkeys.css',) diff --git a/searx/preferences.py b/searx/preferences.py new file mode 100644 index 000000000..a87cd5029 --- /dev/null +++ b/searx/preferences.py @@ -0,0 +1,271 @@ +from searx import settings, autocomplete +from searx.languages import language_codes as languages + + +COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years +LANGUAGE_CODES = [l[0] for l in languages] +LANGUAGE_CODES.append('all') +DISABLED = 0 +ENABLED = 1 + + +class MissingArgumentException(Exception): + pass + + +class ValidationException(Exception): + pass + + +class Setting(object): + """Base class of user settings""" + + def __init__(self, default_value, **kwargs): + super(Setting, self).__init__() + self.value = default_value + for key, value in kwargs.iteritems(): + setattr(self, key, value) + + self._post_init() + + def _post_init(self): + pass + + def parse(self, data): + self.value = data + + def get_value(self): + return self.value + + def save(self, name, resp): + resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE) + + +class StringSetting(Setting): + """Setting of plain string values""" + pass + + +class EnumStringSetting(Setting): + """Setting of a value which can only come from the given choices""" + + def _post_init(self): + if not hasattr(self, 'choices'): + raise MissingArgumentException('Missing argument: choices') + + if self.value != '' and self.value not in self.choices: + raise ValidationException('Invalid default value: {0}'.format(self.value)) + + def parse(self, data): + if data not in self.choices and data != self.value: + raise ValidationException('Invalid choice: {0}'.format(data)) + self.value = data + + +class MultipleChoiceSetting(EnumStringSetting): + """Setting of values which can only come from the given choices""" + + def _post_init(self): + if not hasattr(self, 'choices'): + raise MissingArgumentException('Missing argument: choices') + for item in self.value: + if item not in self.choices: + raise ValidationException('Invalid default value: {0}'.format(self.value)) + + def parse(self, data): + if data == '': + self.value = [] + return + + elements = data.split(',') + for item in elements: + if item not in self.choices: + raise ValidationException('Invalid choice: {0}'.format(item)) + self.value = elements + + def parse_form(self, data): + self.value = [] + for choice in data: + if choice in self.choices and choice not in self.value: + self.value.append(choice) + + def save(self, name, resp): + resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) + + +class MapSetting(Setting): + """Setting of a value that has to be translated in order to be storable""" + + def _post_init(self): + if not hasattr(self, 'map'): + raise MissingArgumentException('missing argument: map') + if self.value not in self.map.values(): + raise ValidationException('Invalid default value') + + def parse(self, data): + if data not in self.map: + raise ValidationException('Invalid choice: {0}'.format(data)) + self.value = self.map[data] + self.key = data + + def save(self, name, resp): + resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE) + + +class SwitchableSetting(Setting): + """ Base class for settings that can be turned on && off""" + + def _post_init(self): + self.disabled = set() + self.enabled = set() + if not hasattr(self, 'choices'): + raise MissingArgumentException('missing argument: choices') + + def transform_form_items(self, items): + return items + + def transform_values(self, values): + return values + + def parse_cookie(self, data): + if data[DISABLED] != '': + self.disabled = set(data[DISABLED].split(',')) + if data[ENABLED] != '': + self.enabled = set(data[ENABLED].split(',')) + + def parse_form(self, items): + items = self.transform_form_items(items) + + self.disabled = set() + self.enabled = set() + for choice in self.choices: + if choice['default_on']: + if choice['id'] in items: + self.disabled.add(choice['id']) + else: + if choice['id'] not in items: + self.enabled.add(choice['id']) + + def save(self, resp): + resp.set_cookie('disabled_{0}'.format(self.value), ','.join(self.disabled), max_age=COOKIE_MAX_AGE) + resp.set_cookie('enabled_{0}'.format(self.value), ','.join(self.enabled), max_age=COOKIE_MAX_AGE) + + def get_disabled(self): + disabled = self.disabled + for choice in self.choices: + if not choice['default_on'] and choice['id'] not in self.enabled: + disabled.add(choice['id']) + return self.transform_values(disabled) + + def get_enabled(self): + enabled = self.enabled + for choice in self.choices: + if choice['default_on'] and choice['id'] not in self.disabled: + enabled.add(choice['id']) + return self.transform_values(enabled) + + +class EnginesSetting(SwitchableSetting): + def _post_init(self): + super(EnginesSetting, self)._post_init() + transformed_choices = [] + for engine_name, engine in self.choices.iteritems(): + for category in engine.categories: + transformed_choice = dict() + transformed_choice['default_on'] = not engine.disabled + transformed_choice['id'] = '{}__{}'.format(engine_name, category) + transformed_choices.append(transformed_choice) + self.choices = transformed_choices + + def transform_form_items(self, items): + return [item[len('engine_'):].replace('_', ' ').replace(' ', '__') for item in items] + + def transform_values(self, values): + if len(values) == 1 and next(iter(values)) == '': + return list() + transformed_values = [] + for value in values: + engine, category = value.split('__') + transformed_values.append((engine, category)) + return transformed_values + + +class PluginsSetting(SwitchableSetting): + def _post_init(self): + super(PluginsSetting, self)._post_init() + transformed_choices = [] + for plugin in self.choices: + transformed_choice = dict() + transformed_choice['default_on'] = plugin.default_on + transformed_choice['id'] = plugin.id + transformed_choices.append(transformed_choice) + self.choices = transformed_choices + + def transform_form_items(self, items): + return [item[len('plugin_'):] for item in items] + + +class Preferences(object): + """Stores, validates and saves preferences to cookies""" + + def __init__(self, themes, categories, engines, plugins): + super(Preferences, self).__init__() + + self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories), + 'language': EnumStringSetting('all', choices=LANGUAGE_CODES), + 'locale': EnumStringSetting(settings['ui']['default_locale'], + choices=settings['locales'].keys()), + 'autocomplete': EnumStringSetting(settings['search']['autocomplete'], + choices=autocomplete.backends.keys()), + 'image_proxy': MapSetting(settings['server']['image_proxy'], + map={'': settings['server']['image_proxy'], + '0': False, + '1': True}), + 'method': EnumStringSetting('POST', choices=('GET', 'POST')), + 'safesearch': MapSetting(settings['search']['safe_search'], map={'0': 0, + '1': 1, + '2': 2}), + 'theme': EnumStringSetting(settings['ui']['default_theme'], choices=themes)} + + self.engines = EnginesSetting('engines', choices=engines) + self.plugins = PluginsSetting('plugins', choices=plugins) + + def parse_cookies(self, input_data): + for user_setting_name, user_setting in input_data.iteritems(): + if user_setting_name in self.key_value_settings: + self.key_value_settings[user_setting_name].parse(user_setting) + elif user_setting_name == 'disabled_engines': + self.engines.parse_cookie((input_data.get('disabled_engines', ''), + input_data.get('enabled_engines', ''))) + elif user_setting_name == 'disabled_plugins': + self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), + input_data.get('enabled_plugins', ''))) + + def parse_form(self, input_data): + disabled_engines = [] + enabled_categories = [] + disabled_plugins = [] + for user_setting_name, user_setting in input_data.iteritems(): + if user_setting_name in self.key_value_settings: + self.key_value_settings[user_setting_name].parse(user_setting) + elif user_setting_name.startswith('engine_'): + disabled_engines.append(user_setting_name) + elif user_setting_name.startswith('category_'): + enabled_categories.append(user_setting_name[len('category_'):]) + elif user_setting_name.startswith('plugin_'): + disabled_plugins.append(user_setting_name) + self.key_value_settings['categories'].parse_form(enabled_categories) + self.engines.parse_form(disabled_engines) + self.plugins.parse_form(disabled_plugins) + + # cannot be used in case of engines or plugins + def get_value(self, user_setting_name): + if user_setting_name in self.key_value_settings: + return self.key_value_settings[user_setting_name].get_value() + + def save(self, resp): + for user_setting_name, user_setting in self.key_value_settings.iteritems(): + user_setting.save(user_setting_name, resp) + self.engines.save(resp) + self.plugins.save(resp) + return resp diff --git a/searx/query.py b/searx/query.py index e79e760a3..3d617ab05 100644 --- a/searx/query.py +++ b/searx/query.py @@ -28,12 +28,12 @@ import re class Query(object): """parse query""" - def __init__(self, query, blocked_engines): + def __init__(self, query, disabled_engines): self.query = query - self.blocked_engines = [] + self.disabled_engines = [] - if blocked_engines: - self.blocked_engines = blocked_engines + if disabled_engines: + self.disabled_engines = disabled_engines self.query_parts = [] self.engines = [] @@ -107,7 +107,7 @@ class Query(object): self.engines.extend({'category': prefix, 'name': engine.name} for engine in categories[prefix] - if (engine.name, prefix) not in self.blocked_engines) + if (engine.name, prefix) not in self.disabled_engines) if query_part[0] == '!': self.specific = True diff --git a/searx/results.py b/searx/results.py index 5d51eb5b5..c3040b305 100644 --- a/searx/results.py +++ b/searx/results.py @@ -37,7 +37,7 @@ def merge_two_infoboxes(infobox1, infobox2): urls1 = infobox1.get('urls', None) if urls1 is None: urls1 = [] - infobox1.set('urls', urls1) + infobox1['urls'] = urls1 urlSet = set() for url in infobox1.get('urls', []): @@ -47,11 +47,17 @@ def merge_two_infoboxes(infobox1, infobox2): if url.get('url', None) not in urlSet: urls1.append(url) + if 'img_src' in infobox2: + img1 = infobox1.get('img_src', None) + img2 = infobox2.get('img_src') + if img1 is None: + infobox1['img_src'] = img2 + if 'attributes' in infobox2: attributes1 = infobox1.get('attributes', None) if attributes1 is None: attributes1 = [] - infobox1.set('attributes', attributes1) + infobox1['attributes'] = attributes1 attributeSet = set() for attribute in infobox1.get('attributes', []): @@ -68,7 +74,7 @@ def merge_two_infoboxes(infobox1, infobox2): if result_content_len(content2) > result_content_len(content1): infobox1['content'] = content2 else: - infobox1.set('content', content2) + infobox1['content'] = content2 def result_score(result): diff --git a/searx/search.py b/searx/search.py index ce41b231b..a40801640 100644 --- a/searx/search.py +++ b/searx/search.py @@ -23,7 +23,7 @@ from searx.engines import ( categories, engines ) from searx.languages import language_codes -from searx.utils import gen_useragent, get_blocked_engines +from searx.utils import gen_useragent from searx.query import Query from searx.results import ResultContainer from searx import logger @@ -140,15 +140,13 @@ class Search(object): self.lang = 'all' # set blocked engines - self.blocked_engines = get_blocked_engines(engines, request.cookies) + self.disabled_engines = request.preferences.engines.get_disabled() self.result_container = ResultContainer() self.request_data = {} # set specific language if set - if request.cookies.get('language')\ - and request.cookies['language'] in (x[0] for x in language_codes): - self.lang = request.cookies['language'] + self.lang = request.preferences.get_value('language') # set request method if request.method == 'POST': @@ -169,7 +167,7 @@ class Search(object): # parse query, if tags are set, which change # the serch engine or search-language - query_obj = Query(self.request_data['q'], self.blocked_engines) + query_obj = Query(self.request_data['q'], self.disabled_engines) query_obj.parse_query() # set query @@ -229,8 +227,7 @@ class Search(object): # using user-defined default-configuration which # (is stored in cookie) if not self.categories: - cookie_categories = request.cookies.get('categories', '') - cookie_categories = cookie_categories.split(',') + cookie_categories = request.preferences.get_value('categories') for ccateg in cookie_categories: if ccateg in categories: self.categories.append(ccateg) @@ -246,7 +243,7 @@ class Search(object): self.engines.extend({'category': categ, 'name': engine.name} for engine in categories[categ] - if (engine.name, categ) not in self.blocked_engines) + if (engine.name, categ) not in self.disabled_engines) # remove suspended engines self.engines = [e for e in self.engines @@ -294,11 +291,8 @@ class Search(object): else: request_params['language'] = self.lang - try: - # 0 = None, 1 = Moderate, 2 = Strict - request_params['safesearch'] = int(request.cookies.get('safesearch')) - except Exception: - request_params['safesearch'] = settings['search']['safe_search'] + # 0 = None, 1 = Moderate, 2 = Strict + request_params['safesearch'] = request.preferences.get_value('safesearch') # update request parameters dependent on # search-engine (contained in engines folder) diff --git a/searx/settings.yml b/searx/settings.yml index c388f55ec..96455fc23 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -45,10 +45,9 @@ engines: shortcut : bs - name : wikipedia - engine : mediawiki + engine : wikipedia shortcut : wp base_url : 'https://{language}.wikipedia.org/' - number_of_results : 1 - name : bing engine : bing @@ -62,6 +61,18 @@ engines: engine : bing_news shortcut : bin + - name : bitbucket + engine : xpath + paging : True + search_url : https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath : //article[@class="repo-summary"]/p + categories : it + timeout : 4.0 + disabled : True + shortcut : bb + - name : btdigg engine : btdigg shortcut : bt @@ -83,11 +94,25 @@ engines: - name : ddg definitions engine : duckduckgo_definitions shortcut : ddd + disabled : True - name : digg engine : digg shortcut : dg + - name : erowid + engine : xpath + paging : True + first_page_num : 0 + page_size : 30 + search_url : https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath : //dl[@class="results-list"]/dd[@class="result-details"] + categories : general + shortcut : ew + disabled : True + - name : wikidata engine : wikidata shortcut : wd @@ -111,6 +136,11 @@ engines: shortcut : 1x disabled : True + - name : fdroid + engine : fdroid + shortcut : fd + disabled : True + - name : flickr categories : images shortcut : fl @@ -131,6 +161,18 @@ engines: shortcut : gb disabled: True + - name : gitlab + engine : xpath + paging : True + search_url : https://gitlab.com/search?page={pageno}&search={query} + url_xpath : //li[@class="project-row"]//a[@class="project"]/@href + title_xpath : //li[@class="project-row"]//span[contains(@class, "project-full-name")] + content_xpath : //li[@class="project-row"]//div[@class="description"]/p + categories : it + shortcut : gl + timeout : 5.0 + disabled : True + - name : github engine : github shortcut : gh @@ -177,10 +219,39 @@ engines: shortcut : gps disabled : True + - name : geektimes + engine : xpath + paging : True + search_url : https://geektimes.ru/search/page{pageno}/?q={query} + url_xpath : //div[@class="search_results"]//a[@class="post_title"]/@href + title_xpath : //div[@class="search_results"]//a[@class="post_title"] + content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] + categories : it + timeout : 4.0 + disabled : True + shortcut : gt + + - name : habrahabr + engine : xpath + paging : True + search_url : https://habrahabr.ru/search/page{pageno}/?q={query} + url_xpath : //div[@class="search_results"]//a[@class="post_title"]/@href + title_xpath : //div[@class="search_results"]//a[@class="post_title"] + content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] + categories : it + timeout : 4.0 + disabled : True + shortcut : habr + - name : mixcloud engine : mixcloud shortcut : mc + - name : nyaa + engine : nyaa + shortcut : nt + disabled : True + - name : openstreetmap engine : openstreetmap shortcut : osm @@ -215,6 +286,13 @@ engines: shortcut : qws categories : social media + - name : reddit + engine : reddit + shortcut : re + page_size : 25 + timeout : 10.0 + disabled : True + - name : kickass engine : kickass shortcut : ka @@ -266,6 +344,17 @@ engines: shortcut : sw disabled : True + - name : tokyotoshokan + engine : tokyotoshokan + shortcut : tt + timeout : 6.0 + disabled : True + + - name : torrentz + engine : torrentz + timeout : 5.0 + shortcut : to + - name : twitter engine : twitter shortcut : tw @@ -339,6 +428,13 @@ engines: # number_of_results : 5 # timeout : 3.0 +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name : ubuntuwiki +# engine : doku +# shortcut : uw +# base_url : 'http://doc.ubuntu-fr.org' + locales: en : English bg : Български (Bulgarian) diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index fb193e43d..7c7c4eec2 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -4,7 +4,7 @@ general: search: safe_search : 0 - autocomplete : 0 + autocomplete : "" server: port : 11111 diff --git a/searx/static/plugins/css/vim_hotkeys.css b/searx/static/plugins/css/vim_hotkeys.css new file mode 100644 index 000000000..2ccfdc1af --- /dev/null +++ b/searx/static/plugins/css/vim_hotkeys.css @@ -0,0 +1,26 @@ +.vim-hotkeys-help { + position: fixed; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + z-index: 9999999; + overflow-y: auto; + max-height: 80%; + box-shadow: 0 0 1em; +} + +.dflex { + display: -webkit-box; /* OLD - iOS 6-, Safari 3.1-6 */ + display: -moz-box; /* OLD - Firefox 19- (buggy but mostly works) */ + display: -ms-flexbox; /* TWEENER - IE 10 */ + display: -webkit-flex; /* NEW - Chrome */ + display: flex; /* NEW, Spec - Opera 12.1, Firefox 20+ */ +} + +.iflex { + -webkit-box-flex: 1; /* OLD - iOS 6-, Safari 3.1-6 */ + -moz-box-flex: 1; /* OLD - Firefox 19- */ + -webkit-flex: 1; /* Chrome */ + -ms-flex: 1; /* IE 10 */ + flex: 1; /* NEW, Spec - Opera 12.1, Firefox 20+ */ +} diff --git a/searx/static/plugins/js/vim_hotkeys.js b/searx/static/plugins/js/vim_hotkeys.js new file mode 100644 index 000000000..61500d8f5 --- /dev/null +++ b/searx/static/plugins/js/vim_hotkeys.js @@ -0,0 +1,336 @@ +$(document).ready(function() { + highlightResult('top')(); + + $('.result').on('click', function() { + highlightResult($(this))(); + }); + + var vimKeys = { + 27: { + key: 'Escape', + fun: removeFocus, + des: 'remove focus from the focused input', + cat: 'Control' + }, + 73: { + key: 'i', + fun: searchInputFocus, + des: 'focus on the search input', + cat: 'Control' + }, + 66: { + key: 'b', + fun: scrollPage(-window.innerHeight), + des: 'scroll one page up', + cat: 'Navigation' + }, + 70: { + key: 'f', + fun: scrollPage(window.innerHeight), + des: 'scroll one page down', + cat: 'Navigation' + }, + 85: { + key: 'u', + fun: scrollPage(-window.innerHeight / 2), + des: 'scroll half a page up', + cat: 'Navigation' + }, + 68: { + key: 'd', + fun: scrollPage(window.innerHeight / 2), + des: 'scroll half a page down', + cat: 'Navigation' + }, + 71: { + key: 'g', + fun: scrollPageTo(-document.body.scrollHeight, 'top'), + des: 'scroll to the top of the page', + cat: 'Navigation' + }, + 86: { + key: 'v', + fun: scrollPageTo(document.body.scrollHeight, 'bottom'), + des: 'scroll to the bottom of the page', + cat: 'Navigation' + }, + 75: { + key: 'k', + fun: highlightResult('up'), + des: 'select previous search result', + cat: 'Results' + }, + 74: { + key: 'j', + fun: highlightResult('down'), + des: 'select next search result', + cat: 'Results' + }, + 80: { + key: 'p', + fun: pageButtonClick(0), + des: 'go to previous page', + cat: 'Results' + }, + 78: { + key: 'n', + fun: pageButtonClick(1), + des: 'go to next page', + cat: 'Results' + }, + 79: { + key: 'o', + fun: openResult(false), + des: 'open search result', + cat: 'Results' + }, + 84: { + key: 't', + fun: openResult(true), + des: 'open the result in a new tab', + cat: 'Results' + }, + 82: { + key: 'r', + fun: reloadPage, + des: 'reload page from the server', + cat: 'Control' + }, + 72: { + key: 'h', + fun: toggleHelp, + des: 'toggle help window', + cat: 'Other' + } + }; + + $(document).keyup(function(e) { + // check for modifiers so we don't break browser's hotkeys + if (vimKeys.hasOwnProperty(e.keyCode) + && !e.ctrlKey + && !e.altKey + && !e.shiftKey + && !e.metaKey) + { + if (e.keyCode === 27) { + if (e.target.tagName.toLowerCase() === 'input') { + vimKeys[e.keyCode].fun(); + } + } else { + if (e.target === document.body) { + vimKeys[e.keyCode].fun(); + } + } + } + }); + + function highlightResult(which) { + return function() { + var current = $('.result[data-vim-selected]'); + if (current.length === 0) { + current = $('.result:first'); + if (current.length === 0) { + return; + } + } + + var next; + + if (typeof which !== 'string') { + next = which; + } else { + switch (which) { + case 'visible': + var top = $(window).scrollTop(); + var bot = top + $(window).height(); + var results = $('.result'); + + for (var i = 0; i < results.length; i++) { + next = $(results[i]); + var etop = next.offset().top; + var ebot = etop + next.height(); + + if ((ebot <= bot) && (etop > top)) { + break; + } + } + break; + case 'down': + next = current.next('.result'); + if (next.length === 0) { + next = $('.result:first'); + } + break; + case 'up': + next = current.prev('.result'); + if (next.length === 0) { + next = $('.result:last'); + } + break; + case 'bottom': + next = $('.result:last'); + break; + case 'top': + default: + next = $('.result:first'); + } + } + + if (next) { + current.removeAttr('data-vim-selected').removeClass('well well-sm'); + next.attr('data-vim-selected', 'true').addClass('well well-sm'); + scrollPageToSelected(); + } + } + } + + function reloadPage() { + document.location.reload(false); + } + + function removeFocus() { + if (document.activeElement) { + document.activeElement.blur(); + } + } + + function pageButtonClick(num) { + return function() { + var buttons = $('div#pagination button[type="submit"]'); + if (buttons.length !== 2) { + console.log('page navigation with this theme is not supported'); + return; + } + if (num >= 0 && num < buttons.length) { + buttons[num].click(); + } else { + console.log('pageButtonClick(): invalid argument'); + } + } + } + + function scrollPageToSelected() { + var sel = $('.result[data-vim-selected]'); + if (sel.length !== 1) { + return; + } + + var wnd = $(window); + + var wtop = wnd.scrollTop(); + var etop = sel.offset().top; + + var offset = 30; + + if (wtop > etop) { + wnd.scrollTop(etop - offset); + } else { + var ebot = etop + sel.height(); + var wbot = wtop + wnd.height(); + + if (wbot < ebot) { + wnd.scrollTop(ebot - wnd.height() + offset); + } + } + } + + function scrollPage(amount) { + return function() { + window.scrollBy(0, amount); + highlightResult('visible')(); + } + } + + function scrollPageTo(position, nav) { + return function() { + window.scrollTo(0, position); + highlightResult(nav)(); + } + } + + function searchInputFocus() { + $('input#q').focus(); + } + + function openResult(newTab) { + return function() { + var link = $('.result[data-vim-selected] .result_header a'); + if (link.length) { + var url = link.attr('href'); + if (newTab) { + window.open(url); + } else { + window.location.href = url; + } + } + }; + } + + function toggleHelp() { + var helpPanel = $('#vim-hotkeys-help'); + if (helpPanel.length) { + helpPanel.toggleClass('hidden'); + return; + } + + var categories = {}; + + for (var k in vimKeys) { + var key = vimKeys[k]; + categories[key.cat] = categories[key.cat] || []; + categories[key.cat].push(key); + } + + var sorted = Object.keys(categories).sort(function(a, b) { + return categories[b].length - categories[a].length; + }); + + if (sorted.length === 0) { + return; + } + + var html = '<div id="vim-hotkeys-help" class="well vim-hotkeys-help">'; + html += '<div class="container-fluid">'; + + html += '<div class="row">'; + html += '<div class="col-sm-12">'; + html += '<h3>How to navigate searx with Vim-like hotkeys</h3>'; + html += '</div>'; // col-sm-12 + html += '</div>'; // row + + for (var i = 0; i < sorted.length; i++) { + var cat = categories[sorted[i]]; + + var lastCategory = i === (sorted.length - 1); + var first = i % 2 === 0; + + if (first) { + html += '<div class="row dflex">'; + } + html += '<div class="col-sm-' + (first && lastCategory ? 12 : 6) + ' dflex">'; + + html += '<div class="panel panel-default iflex">'; + html += '<div class="panel-heading">' + cat[0].cat + '</div>'; + html += '<div class="panel-body">'; + html += '<ul class="list-unstyled">'; + + for (var cj in cat) { + html += '<li><kbd>' + cat[cj].key + '</kbd> ' + cat[cj].des + '</li>'; + } + + html += '</ul>'; + html += '</div>'; // panel-body + html += '</div>'; // panel + html += '</div>'; // col-sm-* + + if (!first || lastCategory) { + html += '</div>'; // row + } + } + + html += '</div>'; // container-fluid + html += '</div>'; // vim-hotkeys-help + + $('body').append(html); + } +}); diff --git a/searx/templates/courgette/preferences.html b/searx/templates/courgette/preferences.html index f89915d8d..ba4d0c650 100644 --- a/searx/templates/courgette/preferences.html +++ b/searx/templates/courgette/preferences.html @@ -109,7 +109,7 @@ <td>{{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎</td> <td>{{ _(categ) }}</td> <td class="engine_checkbox"> - <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in blocked_engines %} checked="checked"{% endif %} /> + <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in disabled_engines %} checked="checked"{% endif %} /> <label class="allow" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Allow') }}</label> <label class="deny" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Block') }}</label> </td> diff --git a/searx/templates/default/preferences.html b/searx/templates/default/preferences.html index 90006c029..a47dba458 100644 --- a/searx/templates/default/preferences.html +++ b/searx/templates/default/preferences.html @@ -97,7 +97,7 @@ <td>{{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎</td> <td>{{ _(categ) }}</td> <td class="engine_checkbox"> - <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in blocked_engines %} checked="checked"{% endif %} /> + <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in disabled_engines %} checked="checked"{% endif %} /> <label class="allow" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Allow') }}</label> <label class="deny" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Block') }}</label> </td> diff --git a/searx/templates/oscar/base.html b/searx/templates/oscar/base.html index 4813fffc2..f63025ecc 100644 --- a/searx/templates/oscar/base.html +++ b/searx/templates/oscar/base.html @@ -86,5 +86,8 @@ {% for script in scripts %} <script src="{{ url_for('static', filename=script) }}"></script> {% endfor %} + <script type="text/javascript"> + $(function() { $('a[data-toggle="modal"]').attr('href', '#'); }); + </script> </body> </html> diff --git a/searx/templates/oscar/infobox.html b/searx/templates/oscar/infobox.html index d87d98453..c72cfb638 100644 --- a/searx/templates/oscar/infobox.html +++ b/searx/templates/oscar/infobox.html @@ -1,8 +1,9 @@ <div class="panel panel-default infobox"> <div class="panel-heading"> - <h4 class="panel-title infobox_part">{{ infobox.infobox }}</h4> + <bdi><h4 class="panel-title infobox_part">{{ infobox.infobox }}</h4></bdi> </div> <div class="panel-body"> + <bdi> {% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %} {% if infobox.content %}<p class="infobox_part">{{ infobox.content }}</p>{% endif %} @@ -28,5 +29,6 @@ {% endfor %} </div> {% endif %} + </bdi> </div> </div> diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html index c677a0c66..a2c493a02 100644 --- a/searx/templates/oscar/preferences.html +++ b/searx/templates/oscar/preferences.html @@ -157,7 +157,7 @@ {% if not search_engine.private %} <tr> {% if not rtl %} - <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in blocked_engines) }}</td> + <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in disabled_engines) }}</td> <th>{{ search_engine.name }}</th> <td>{{ shortcuts[search_engine.name] }}</td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> @@ -169,7 +169,7 @@ <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td>{{ shortcuts[search_engine.name] }}</td> <th>{{ search_engine.name }}</th> - <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in blocked_engines) }}</td> + <td>{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in disabled_engines) }}</td> {% endif %} </tr> {% endif %} diff --git a/searx/templates/oscar/result_templates/default.html b/searx/templates/oscar/result_templates/default.html index fc61b8ce0..f283693c3 100644 --- a/searx/templates/oscar/result_templates/default.html +++ b/searx/templates/oscar/result_templates/default.html @@ -13,7 +13,16 @@ </div>
{% endif %}
+{% if result.img_src %}
+<div class="container-fluid">
+ <div class="row">
+<img src="{{ image_proxify(result.img_src) }}" alt="{{ result.title|striptags }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content">
+{% if result.content %}<p class="result-content col-xs-8 col-sm-8 col-md-8">{{ result.content|safe }}</p>{% endif %}
+ </div>
+</div>
+{% else %}
{% if result.content %}<p class="result-content">{{ result.content|safe }}</p>{% endif %}
+{% endif %}
{% if rtl %}
{{ result_footer_rtl(result) }}
diff --git a/searx/templates/pix-art/preferences.html b/searx/templates/pix-art/preferences.html index f59497ec8..a4a6cd268 100644 --- a/searx/templates/pix-art/preferences.html +++ b/searx/templates/pix-art/preferences.html @@ -60,7 +60,7 @@ <tr> <td>{{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎</td> <td class="engine_checkbox"> - <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in blocked_engines %} checked="checked"{% endif %} /> + <input type="checkbox" id="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}" name="engine_{{ search_engine.name }}__{{ categ }}"{% if (search_engine.name, categ) in disabled_engines %} checked="checked"{% endif %} /> <label class="allow" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Allow') }}</label> <label class="deny" for="engine_{{ categ|replace(' ', '_') }}_{{ search_engine.name|replace(' ', '_') }}">{{ _('Block') }}</label> </td> diff --git a/searx/utils.py b/searx/utils.py index 506228465..b297582ef 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -230,26 +230,3 @@ def list_get(a_list, index, default=None): return a_list[index] else: return default - - -def get_blocked_engines(engines, cookies): - if 'blocked_engines' not in cookies: - return [(engine_name, category) for engine_name in engines - for category in engines[engine_name].categories if engines[engine_name].disabled] - - blocked_engine_strings = cookies.get('blocked_engines', '').split(',') - blocked_engines = [] - - if not blocked_engine_strings: - return blocked_engines - - for engine_string in blocked_engine_strings: - if engine_string.find('__') > -1: - engine, category = engine_string.split('__', 1) - if engine in engines and category in engines[engine].categories: - blocked_engines.append((engine, category)) - elif engine_string in engines: - for category in engines[engine_string].categories: - blocked_engines.append((engine_string, category)) - - return blocked_engines diff --git a/searx/webapp.py b/searx/webapp.py index d10f04e21..ed8ff1bd9 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -56,7 +56,7 @@ from searx.engines import ( from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes, get_static_files, get_result_templates, gen_useragent, dict_subset, - prettify_url, get_blocked_engines + prettify_url ) from searx.version import VERSION_STRING from searx.languages import language_codes @@ -64,6 +64,7 @@ from searx.search import Search from searx.query import Query from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.plugins import plugins +from searx.preferences import Preferences # check if the pyopenssl, ndg-httpsclient, pyasn1 packages are installed. # They are needed for SSL connection without trouble, see #298 @@ -73,7 +74,7 @@ try: import pyasn1 # NOQA except ImportError: logger.critical("The pyopenssl, ndg-httpsclient, pyasn1 packages have to be installed.\n" - "Some HTTPS connections will failed") + "Some HTTPS connections will fail") static_path, templates_path, themes =\ @@ -109,8 +110,7 @@ for indice, theme in enumerate(themes): for (dirpath, dirnames, filenames) in os.walk(theme_img_path): global_favicons[indice].extend(filenames) -cookie_max_age = 60 * 60 * 24 * 365 * 5 # 5 years - +# used when translating category names _category_names = (gettext('files'), gettext('general'), gettext('music'), @@ -129,11 +129,8 @@ outgoing_proxies = settings['outgoing'].get('proxies', None) def get_locale(): locale = request.accept_languages.best_match(settings['locales'].keys()) - if settings['ui'].get('default_locale'): - locale = settings['ui']['default_locale'] - - if request.cookies.get('locale', '') in settings['locales']: - locale = request.cookies.get('locale', '') + if request.preferences.get_value('locale') != '': + locale = request.preferences.get_value('locale') if 'locale' in request.args\ and request.args['locale'] in settings['locales']: @@ -222,9 +219,7 @@ def get_current_theme_name(override=None): if override and override in themes: return override - theme_name = request.args.get('theme', - request.cookies.get('theme', - default_theme)) + theme_name = request.args.get('theme', request.preferences.get_value('theme')) if theme_name not in themes: theme_name = default_theme return theme_name @@ -251,7 +246,7 @@ def image_proxify(url): if url.startswith('//'): url = 'https:' + url - if not settings['server'].get('image_proxy') and not request.cookies.get('image_proxy'): + if not request.preferences.get_value('image_proxy'): return url hash_string = url + settings['server']['secret_key'] @@ -262,23 +257,18 @@ def image_proxify(url): def render(template_name, override_theme=None, **kwargs): - blocked_engines = get_blocked_engines(engines, request.cookies) - - autocomplete = request.cookies.get('autocomplete', settings['search']['autocomplete']) - - if autocomplete not in autocomplete_backends: - autocomplete = None + disabled_engines = request.preferences.engines.get_disabled() - nonblocked_categories = set(category for engine_name in engines - for category in engines[engine_name].categories - if (engine_name, category) not in blocked_engines) + enabled_categories = set(category for engine_name in engines + for category in engines[engine_name].categories + if (engine_name, category) not in disabled_engines) if 'categories' not in kwargs: kwargs['categories'] = ['general'] kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general' - and x in nonblocked_categories) + and x in enabled_categories) if 'all_categories' not in kwargs: kwargs['all_categories'] = ['general'] @@ -295,25 +285,24 @@ def render(template_name, override_theme=None, **kwargs): kwargs['selected_categories'].append(c) if not kwargs['selected_categories']: - cookie_categories = request.cookies.get('categories', '').split(',') + cookie_categories = request.preferences.get_value('categories') for ccateg in cookie_categories: - if ccateg in categories: - kwargs['selected_categories'].append(ccateg) + kwargs['selected_categories'].append(ccateg) if not kwargs['selected_categories']: kwargs['selected_categories'] = ['general'] if 'autocomplete' not in kwargs: - kwargs['autocomplete'] = autocomplete + kwargs['autocomplete'] = request.preferences.get_value('autocomplete') if get_locale() in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True kwargs['searx_version'] = VERSION_STRING - kwargs['method'] = request.cookies.get('method', 'POST') + kwargs['method'] = request.preferences.get_value('method') - kwargs['safesearch'] = request.cookies.get('safesearch', str(settings['search']['safe_search'])) + kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) # override url_for function in templates kwargs['url_for'] = url_for_theme @@ -347,14 +336,18 @@ def render(template_name, override_theme=None, **kwargs): @app.before_request def pre_request(): # merge GET, POST vars + preferences = Preferences(themes, categories.keys(), engines, plugins) + preferences.parse_cookies(request.cookies) + request.preferences = preferences + request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v request.user_plugins = [] - allowed_plugins = request.cookies.get('allowed_plugins', '').split(',') - disabled_plugins = request.cookies.get('disabled_plugins', '').split(',') + allowed_plugins = preferences.plugins.get_enabled() + disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): @@ -486,18 +479,18 @@ def autocompleter(): request_data = request.args # set blocked engines - blocked_engines = get_blocked_engines(engines, request.cookies) + disabled_engines = request.preferences.engines.get_disabled() # parse query - query = Query(request_data.get('q', '').encode('utf-8'), blocked_engines) + query = Query(request_data.get('q', '').encode('utf-8'), disabled_engines) query.parse_query() # check if search query is set if not query.getSearchQuery(): return '', 400 - # get autocompleter - completer = autocomplete_backends.get(request.cookies.get('autocomplete', settings['search']['autocomplete'])) + # run autocompleter + completer = autocomplete_backends.get(request.preferences.get_value('autocomplete')) # parse searx specific autocompleter results like !bang raw_results = searx_bang(query) @@ -505,7 +498,7 @@ def autocompleter(): # normal autocompletion results only appear if max 3 inner results returned if len(raw_results) <= 3 and completer: # get language from cookie - language = request.cookies.get('language') + language = request.preferences.get_value('language') if not language or language == 'all': language = 'en' else: @@ -532,117 +525,23 @@ def autocompleter(): @app.route('/preferences', methods=['GET', 'POST']) def preferences(): - """Render preferences page. - - Settings that are going to be saved as cookies.""" - lang = None - image_proxy = request.cookies.get('image_proxy', settings['server'].get('image_proxy')) - - if request.cookies.get('language')\ - and request.cookies['language'] in (x[0] for x in language_codes): - lang = request.cookies['language'] - - blocked_engines = [] - - resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) - - if request.method == 'GET': - blocked_engines = get_blocked_engines(engines, request.cookies) - else: # on save - selected_categories = [] - post_disabled_plugins = [] - locale = None - autocomplete = '' - method = 'POST' - safesearch = settings['search']['safe_search'] - for pd_name, pd in request.form.items(): - if pd_name.startswith('category_'): - category = pd_name[9:] - if category not in categories: - continue - selected_categories.append(category) - elif pd_name == 'locale' and pd in settings['locales']: - locale = pd - elif pd_name == 'image_proxy': - image_proxy = pd - elif pd_name == 'autocomplete': - autocomplete = pd - elif pd_name == 'language' and (pd == 'all' or - pd in (x[0] for - x in language_codes)): - lang = pd - elif pd_name == 'method': - method = pd - elif pd_name == 'safesearch': - safesearch = pd - elif pd_name.startswith('engine_'): - if pd_name.find('__') > -1: - # TODO fix underscore vs space - engine_name, category = [x.replace('_', ' ') for x in - pd_name.replace('engine_', '', 1).split('__', 1)] - if engine_name in engines and category in engines[engine_name].categories: - blocked_engines.append((engine_name, category)) - elif pd_name == 'theme': - theme = pd if pd in themes else default_theme - elif pd_name.startswith('plugin_'): - plugin_id = pd_name.replace('plugin_', '', 1) - if not any(plugin.id == plugin_id for plugin in plugins): - continue - post_disabled_plugins.append(plugin_id) - else: - resp.set_cookie(pd_name, pd, max_age=cookie_max_age) - - disabled_plugins = [] - allowed_plugins = [] - for plugin in plugins: - if plugin.default_on: - if plugin.id in post_disabled_plugins: - disabled_plugins.append(plugin.id) - elif plugin.id not in post_disabled_plugins: - allowed_plugins.append(plugin.id) + """Render preferences page && save user preferences""" - resp.set_cookie('disabled_plugins', ','.join(disabled_plugins), max_age=cookie_max_age) - - resp.set_cookie('allowed_plugins', ','.join(allowed_plugins), max_age=cookie_max_age) - - resp.set_cookie( - 'blocked_engines', ','.join('__'.join(e) for e in blocked_engines), - max_age=cookie_max_age - ) - - if locale: - resp.set_cookie( - 'locale', locale, - max_age=cookie_max_age - ) - - if lang: - resp.set_cookie( - 'language', lang, - max_age=cookie_max_age - ) - - if selected_categories: - # cookie max age: 4 weeks - resp.set_cookie( - 'categories', ','.join(selected_categories), - max_age=cookie_max_age - ) - - resp.set_cookie( - 'autocomplete', autocomplete, - max_age=cookie_max_age - ) - - resp.set_cookie('method', method, max_age=cookie_max_age) - - resp.set_cookie('safesearch', str(safesearch), max_age=cookie_max_age) - - resp.set_cookie('image_proxy', image_proxy, max_age=cookie_max_age) - - resp.set_cookie('theme', theme, max_age=cookie_max_age) - - return resp + # save preferences + if request.method == 'POST': + resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) + try: + request.preferences.parse_form(request.form) + except ValidationException: + # TODO use flash feature of flask + return resp + return request.preferences.save(resp) + + # render preferences + image_proxy = request.preferences.get_value('image_proxy') + lang = request.preferences.get_value('language') + disabled_engines = request.preferences.engines.get_disabled() + allowed_plugins = request.preferences.plugins.get_enabled() # stats for preferences page stats = {} @@ -664,17 +563,17 @@ def preferences(): return render('preferences.html', locales=settings['locales'], current_locale=get_locale(), - current_language=lang or 'all', + current_language=lang, image_proxy=image_proxy, language_codes=language_codes, engines_by_category=categories, stats=stats, - blocked_engines=blocked_engines, + disabled_engines=disabled_engines, autocomplete_backends=autocomplete_backends, shortcuts={y: x for x, y in engine_shortcuts.items()}, themes=themes, plugins=plugins, - allowed_plugins=[plugin.id for plugin in request.user_plugins], + allowed_plugins=allowed_plugins, theme=get_current_theme_name()) @@ -750,7 +649,7 @@ Disallow: /preferences def opensearch(): method = 'post' - if request.cookies.get('method', 'POST') == 'GET': + if request.preferences.get_value('method') == 'GET': method = 'get' # chrome/chromium only supports HTTP GET.... |