From 7bcdc27a2e2bf7700cd0b1515bcd5e41b40d0bea Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 3 Jan 2017 19:15:55 +0100 Subject: [fix] fix standalone_search.py --- utils/standalone_search.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/utils/standalone_search.py b/utils/standalone_search.py index 7e9516f82..cad32eeca 100644 --- a/utils/standalone_search.py +++ b/utils/standalone_search.py @@ -13,10 +13,9 @@ request_params = default_request_params() # Possible params # request_params['headers']['User-Agent'] = '' # request_params['category'] = '' -# request_params['started'] = '' - request_params['pageno'] = 1 request_params['language'] = 'en_us' +request_params['time_range'] = '' params = google.request(argv[1], request_params) @@ -32,5 +31,5 @@ else: request_args['data'] = request_params['data'] resp = req(request_params['url'], **request_args) - +resp.search_params = request_params print(dumps(google.response(resp))) -- cgit v1.2.3 From 369ec017b76f65c8202581261165c9423631e1f0 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 4 Jan 2017 14:01:29 +0100 Subject: [enh] standalone_seax.py is a command line interface to searx with JSON output. --- utils/google_search.py | 35 ++++++++++++++++ utils/standalone_search.py | 35 ---------------- utils/standalone_searx.py | 101 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 35 deletions(-) create mode 100644 utils/google_search.py delete mode 100644 utils/standalone_search.py create mode 100755 utils/standalone_searx.py (limited to 'utils') diff --git a/utils/google_search.py b/utils/google_search.py new file mode 100644 index 000000000..cad32eeca --- /dev/null +++ b/utils/google_search.py @@ -0,0 +1,35 @@ +from sys import argv, exit + +if not len(argv) > 1: + print('search query required') + exit(1) + +import requests +from json import dumps +from searx.engines import google +from searx.search import default_request_params + +request_params = default_request_params() +# Possible params +# request_params['headers']['User-Agent'] = '' +# request_params['category'] = '' +request_params['pageno'] = 1 +request_params['language'] = 'en_us' +request_params['time_range'] = '' + +params = google.request(argv[1], request_params) + +request_args = dict( + headers=request_params['headers'], + cookies=request_params['cookies'], +) + +if request_params['method'] == 'GET': + req = requests.get +else: + req = requests.post + request_args['data'] = request_params['data'] + +resp = req(request_params['url'], **request_args) +resp.search_params = request_params +print(dumps(google.response(resp))) diff --git a/utils/standalone_search.py b/utils/standalone_search.py deleted file mode 100644 index cad32eeca..000000000 --- a/utils/standalone_search.py +++ /dev/null @@ -1,35 +0,0 @@ -from sys import argv, exit - -if not len(argv) > 1: - print('search query required') - exit(1) - -import requests -from json import dumps -from searx.engines import google -from searx.search import default_request_params - -request_params = default_request_params() -# Possible params -# request_params['headers']['User-Agent'] = '' -# request_params['category'] = '' -request_params['pageno'] = 1 -request_params['language'] = 'en_us' -request_params['time_range'] = '' - -params = google.request(argv[1], request_params) - -request_args = dict( - headers=request_params['headers'], - cookies=request_params['cookies'], -) - -if request_params['method'] == 'GET': - req = requests.get -else: - req = requests.post - request_args['data'] = request_params['data'] - -resp = req(request_params['url'], **request_args) -resp.search_params = request_params -print(dumps(google.response(resp))) diff --git a/utils/standalone_searx.py b/utils/standalone_searx.py new file mode 100755 index 000000000..b19df4bae --- /dev/null +++ b/utils/standalone_searx.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2016- by Alexandre Flament, +''' + +# set path +from sys import path +from os.path import realpath, dirname +path.append(realpath(dirname(realpath(__file__)) + '/../')) + +# initialization +from json import dumps +from searx import settings +import searx.query +import searx.search +import searx.engines +import searx.preferences +import argparse + +searx.engines.initialize_engines(settings['engines']) + +# command line parsing +parser = argparse.ArgumentParser(description='Standalone searx.') +parser.add_argument('query', type=str, + help='Text query') +parser.add_argument('--category', type=str, nargs='?', + choices=searx.engines.categories.keys(), + default='general', + help='Search category') +parser.add_argument('--lang', type=str, nargs='?',default='all', + help='Search language') +parser.add_argument('--pageno', type=int, nargs='?', default=1, + help='Page number starting from 1') +parser.add_argument('--safesearch', type=str, nargs='?', choices=['0', '1', '2'], default='0', + help='Safe content filter from none to strict') +parser.add_argument('--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], + help='Filter by time range') +args = parser.parse_args() + +# search results for the query +form = { + "q":args.query, + "categories":args.category.decode('utf-8'), + "pageno":str(args.pageno), + "language":args.lang, + "time_range":args.timerange +} +preferences = searx.preferences.Preferences(['oscar'], searx.engines.categories.keys(), searx.engines.engines, []) +preferences.key_value_settings['safesearch'].parse(args.safesearch) + +search_query = searx.search.get_search_query_from_webapp(preferences, form) +search = searx.search.Search(search_query) +result_container = search.search() + +# output +from datetime import datetime + +def no_parsed_url(results): + for result in results: + del result['parsed_url'] + return results + +def json_serial(obj): + """JSON serializer for objects not serializable by default json code""" + if isinstance(obj, datetime): + serial = obj.isoformat() + return serial + raise TypeError ("Type not serializable") + +result_container_json = { + "search": { + "q": search_query.query, + "pageno": search_query.pageno, + "lang": search_query.lang, + "safesearch": search_query.safesearch, + "timerange": search_query.time_range, + "engines": search_query.engines + }, + "results": no_parsed_url(result_container.get_ordered_results()), + "infoboxes": result_container.infoboxes, + "suggestions": list(result_container.suggestions), + "answers": list(result_container.answers), + "paging": result_container.paging, + "results_number": result_container.results_number() +} + +print(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial)) -- cgit v1.2.3