diff options
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/currency_convert.py | 13 | ||||
| -rw-r--r-- | searx/engines/solidtorrents.py | 88 | ||||
| -rw-r--r-- | searx/engines/tineye.py | 103 | ||||
| -rw-r--r-- | searx/engines/www1x.py | 17 |
4 files changed, 183 insertions, 38 deletions
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 969688126..18ea6cb19 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - currency convert (DuckDuckGo) +# lint: pylint +"""Currency convert (DuckDuckGo) """ import json @@ -13,18 +13,19 @@ about = { "use_official_api": False, "require_api_key": False, "results": 'JSONP', + "description": "Service from DuckDuckGo.", } engine_type = 'online_currency' categories = [] -url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' +base_url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' weight = 100 https_support = True -def request(query, params): - params['url'] = url.format(params['from'], params['to']) +def request(_query, params): + params['url'] = base_url.format(params['from'], params['to']) return params @@ -34,7 +35,7 @@ def response(resp): results = [] try: conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount']) - except: + except ValueError: return results answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format( resp.search_params['amount'], diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 614b38277..9b5d543d8 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -1,51 +1,89 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Solid Torrents - +"""SolidTorrents """ -from json import loads +from datetime import datetime from urllib.parse import urlencode +import random + +from lxml import html + +from searx.utils import ( + extract_text, + eval_xpath, + eval_xpath_getindex, + eval_xpath_list, + get_torrent_size, +) about = { "website": 'https://www.solidtorrents.net/', "wikidata_id": None, "official_api_documentation": None, - "use_official_api": True, + "use_official_api": False, "require_api_key": False, - "results": 'JSON', + "results": 'HTML', } categories = ['files'] paging = True -base_url = 'https://www.solidtorrents.net/' -search_url = base_url + 'api/v1/search?{query}' +# base_url can be overwritten by a list of URLs in the settings.yml +base_url = 'https://solidtorrents.net' def request(query, params): - skip = (params['pageno'] - 1) * 20 - query = urlencode({'q': query, 'skip': skip}) + if isinstance(base_url, list): + params['base_url'] = random.choice(base_url) + else: + params['base_url'] = base_url + search_url = params['base_url'] + '/search?{query}' + page = (params['pageno'] - 1) * 20 + query = urlencode({'q': query, 'page': page}) params['url'] = search_url.format(query=query) - logger.debug("query_url --> %s", params['url']) return params def response(resp): results = [] - search_results = loads(resp.text) - - for result in search_results["results"]: - results.append( - { - 'infohash': result["infohash"], - 'seed': result["swarm"]["seeders"], - 'leech': result["swarm"]["leechers"], - 'title': result["title"], - 'url': "https://solidtorrents.net/view/" + result["_id"], - 'filesize': result["size"], - 'magnetlink': result["magnet"], - 'template': "torrent.html", - } - ) + dom = html.fromstring(resp.text) + + for result in eval_xpath(dom, '//div[contains(@class, "search-result")]'): + a = eval_xpath_getindex(result, './div/h5/a', 0, None) + if a is None: + continue + title = extract_text(a) + url = eval_xpath_getindex(a, '@href', 0, None) + categ = eval_xpath(result, './div//a[contains(@class, "category")]') + metadata = extract_text(categ) + stats = eval_xpath_list(result, './div//div[contains(@class, "stats")]/div', min_len=5) + n, u = extract_text(stats[1]).split() + filesize = get_torrent_size(n, u) + leech = extract_text(stats[2]) + seed = extract_text(stats[3]) + torrentfile = eval_xpath_getindex(result, './div//a[contains(@class, "dl-torrent")]/@href', 0, None) + magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None) + + params = { + 'seed': seed, + 'leech': leech, + 'title': title, + 'url': resp.search_params['base_url'] + url, + 'filesize': filesize, + 'magnetlink': magnet, + 'torrentfile': torrentfile, + 'metadata': metadata, + 'template': "torrent.html", + } + + date_str = extract_text(stats[4]) + + try: + params['publishedDate'] = datetime.strptime(date_str, '%b %d, %Y') + except ValueError: + pass + + results.append(params) + return results diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py new file mode 100644 index 000000000..fe5b60393 --- /dev/null +++ b/searx/engines/tineye.py @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""This engine implements *Tineye - reverse image search* + +Using TinEye, you can search by image or perform what we call a reverse image +search. You can do that by uploading an image or searching by URL. You can also +simply drag and drop your images to start your search. TinEye constantly crawls +the web and adds images to its index. Today, the TinEye index is over 50.2 +billion images `[tineye.com] <https://tineye.com/how>`_. + +.. hint:: + + This SearXNG engine only supports *'searching by URL'* and it does not use + the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_. + +""" + +from urllib.parse import urlencode +from datetime import datetime + +about = { + "website": 'https://tineye.com', + "wikidata_id": 'Q2382535', + "official_api_documentation": 'https://api.tineye.com/python/docs/', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +engine_type = 'online_url_search' +categories = ['general'] +paging = True +safesearch = False +base_url = 'https://tineye.com' +search_string = '/result_json/?page={page}&{query}' + + +def request(query, params): + + if params['search_urls']['data:image']: + query = params['search_urls']['data:image'] + elif params['search_urls']['http']: + query = params['search_urls']['http'] + + query = urlencode({'url': query}) + + # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py + params['url'] = base_url + search_string.format(query=query, page=params['pageno']) + + params['headers'].update( + { + 'Connection': 'keep-alive', + 'Accept-Encoding': 'gzip, defalte, br', + 'Host': 'tineye.com', + 'DNT': '1', + 'TE': 'trailers', + } + ) + return params + + +def response(resp): + results = [] + + # Define wanted results + json_data = resp.json() + number_of_results = json_data['num_matches'] + + for i in json_data['matches']: + image_format = i['format'] + width = i['width'] + height = i['height'] + thumbnail_src = i['image_url'] + backlink = i['domains'][0]['backlinks'][0] + url = backlink['backlink'] + source = backlink['url'] + title = backlink['image_name'] + img_src = backlink['url'] + + # Get and convert published date + api_date = backlink['crawl_date'][:-3] + publishedDate = datetime.fromisoformat(api_date) + + # Append results + results.append( + { + 'template': 'images.html', + 'url': url, + 'thumbnail_src': thumbnail_src, + 'source': source, + 'title': title, + 'img_src': img_src, + 'format': image_format, + 'widht': width, + 'height': height, + 'publishedDate': publishedDate, + } + ) + + # Append number of results + results.append({'number_of_results': number_of_results}) + + return results diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index f6b82944d..a7ec06f18 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -1,10 +1,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - 1x (Images) +# lint: pylint +"""1x (Images) + """ -from lxml import html, etree from urllib.parse import urlencode, urljoin +from lxml import html, etree + from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex # about @@ -38,13 +40,14 @@ def request(query, params): def response(resp): results = [] xmldom = etree.fromstring(resp.content) - xmlsearchresult = eval_xpath_getindex(xmldom, '//searchresult', 0) + xmlsearchresult = eval_xpath_getindex(xmldom, '//data', 0) dom = html.fragment_fromstring(xmlsearchresult.text, create_parent='div') - for link in eval_xpath_list(dom, '/div/table/tr/td/div[2]//a'): + for link in eval_xpath_list(dom, '//a'): url = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - thumbnail_src = urljoin(gallery_url, eval_xpath_getindex(link, './/img', 0).attrib['src']) - + thumbnail_src = urljoin( + gallery_url, (eval_xpath_getindex(link, './/img', 0).attrib['src']).replace(base_url, '') + ) # append result results.append( { |