diff options
| author | Michael Vieira <dtox94@gmail.com> | 2018-02-23 11:52:14 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-02-23 11:52:14 +0100 |
| commit | eb0abb0825dd781417cd59ebcf5892fe76ad0527 (patch) | |
| tree | 139f43277c0fffe31f460eb828db465cd02a90c0 /searx/engines | |
| parent | 0367c9ab48fd111d331c113bf7b74824275b51b7 (diff) | |
| parent | 9fea2060b53cea72d57c92fef66a591ec867aee8 (diff) | |
Merge branch 'master' into master
Diffstat (limited to 'searx/engines')
| -rw-r--r-- | searx/engines/__init__.py | 8 | ||||
| -rw-r--r-- | searx/engines/asksteem.py | 44 | ||||
| -rw-r--r-- | searx/engines/microsoft_academic.py | 75 |
3 files changed, 123 insertions, 4 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index b4479157b..af3cf8110 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -230,12 +230,12 @@ def load_engines(engine_list): def initialize_engines(engine_list): load_engines(engine_list) - for engine in engines.items(): + for engine_name, engine in engines.items(): if hasattr(engine, 'init'): - init_fn = getattr(engine, engine_attr) + init_fn = getattr(engine, 'init') def engine_init(): init_fn() - logger.debug('%s engine initialized', engine_data['name']) - logger.debug('Starting background initialization of %s engine', engine_data['name']) + logger.debug('%s engine initialized', engine_name) + logger.debug('Starting background initialization of %s engine', engine_name) threading.Thread(target=engine_init).start() diff --git a/searx/engines/asksteem.py b/searx/engines/asksteem.py new file mode 100644 index 000000000..6738780fa --- /dev/null +++ b/searx/engines/asksteem.py @@ -0,0 +1,44 @@ +""" + Asksteem (general) + + @website https://asksteem.com/ + @provide-api yes + + @using-api yes + @results JSON (https://github.com/Hoxly/asksteem-docs/wiki) + @stable yes + @parse url, title, content +""" + +from json import loads +from searx.url_utils import urlencode + +# engine dependent config +categories = ['general'] +paging = True +language_support = False +disabled = True + +# search-url +search_url = 'https://api.asksteem.com/search?{params}' +result_url = 'https://steemit.com/@{author}/{title}' + + +# do search-request +def request(query, params): + url = search_url.format(params=urlencode({'q': query, 'pg': params['pageno']})) + params['url'] = url + return params + + +# get response from search-request +def response(resp): + json = loads(resp.text) + + results = [] + + for result in json.get('results', []): + results.append({'url': result_url.format(author=result['author'], title=result['permlink']), + 'title': result['title'], + 'content': result['summary']}) + return results diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py new file mode 100644 index 000000000..9387b08d0 --- /dev/null +++ b/searx/engines/microsoft_academic.py @@ -0,0 +1,75 @@ +""" +Microsoft Academic (Science) + +@website https://academic.microsoft.com +@provide-api yes +@using-api no +@results JSON +@stable no +@parse url, title, content +""" + +from datetime import datetime +from json import loads +from uuid import uuid4 + +from searx.url_utils import urlencode +from searx.utils import html_to_text + +categories = ['images'] +paging = True +result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}' + + +def request(query, params): + correlation_id = uuid4() + msacademic = uuid4() + time_now = datetime.now() + + params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id})) + params['cookies']['msacademic'] = str(msacademic) + params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now)) + params['method'] = 'POST' + params['data'] = { + 'Query': '@{query}@'.format(query=query), + 'Limit': 10, + 'Offset': params['pageno'] - 1, + 'Filters': '', + 'OrderBy': '', + 'SortAscending': False, + } + + return params + + +def response(resp): + results = [] + response_data = loads(resp.text) + + for result in response_data['results']: + url = _get_url(result) + title = result['e']['dn'] + content = _get_content(result) + results.append({ + 'url': url, + 'title': html_to_text(title), + 'content': html_to_text(content), + }) + + return results + + +def _get_url(result): + if 's' in result['e']: + return result['e']['s'][0]['u'] + return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id']) + + +def _get_content(result): + if 'd' in result['e']: + content = result['e']['d'] + if len(content) > 300: + return content[:300] + '...' + return content + + return '' |