diff options
| author | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-09-02 18:49:42 +0200 |
|---|---|---|
| committer | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-09-02 18:49:42 +0200 |
| commit | a46bbb40422564b5576b81c978fb734dbf45a9ce (patch) | |
| tree | 7c78ecdb5b15999f53ad1ba82413e49469eecd86 | |
| parent | 80f98d60413c742d603da8eae3596999942ae77a (diff) | |
fix stackoverflow and add comments
| -rw-r--r-- | searx/engines/stackoverflow.py | 42 | ||||
| -rw-r--r-- | searx/settings.yml | 1 |
2 files changed, 35 insertions, 8 deletions
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index e24b309c1..edbe74a70 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -1,30 +1,58 @@ +## Stackoverflow (It) +# +# @website https://stackoverflow.com/ +# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, content + from urlparse import urljoin from cgi import escape from urllib import urlencode from lxml import html +# engine dependent config categories = ['it'] +paging = True +# search-url url = 'http://stackoverflow.com/' search_url = url+'search?{query}&page={pageno}' -result_xpath = './/div[@class="excerpt"]//text()' -paging = True +# specific xpath variables +results_xpath = '//div[contains(@class,"question-summary")]' +link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' +title_xpath = './/text()' +content_xpath = './/div[@class="excerpt"]//text()' +# do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']) + return params +# get response from search-request def response(resp): results = [] + dom = html.fromstring(resp.text) - for result in dom.xpath('//div[@class="question-summary search-result"]'): - link = result.xpath('.//div[@class="result-link"]//a')[0] + + # parse results + for result in dom.xpath(results_xpath): + link = result.xpath(link_xpath)[0] href = urljoin(url, link.attrib.get('href')) - title = escape(' '.join(link.xpath('.//text()'))) - content = escape(' '.join(result.xpath(result_xpath))) - results.append({'url': href, 'title': title, 'content': content}) + title = escape(' '.join(link.xpath(title_xpath))) + content = escape(' '.join(result.xpath(content_xpath))) + + # append result + results.append({'url': href, + 'title': title, + 'content': content}) + + # return results return results diff --git a/searx/settings.yml b/searx/settings.yml index 00ea2c339..a08a15403 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -90,7 +90,6 @@ engines: - name : stackoverflow engine : stackoverflow - categories : it shortcut : st - name : startpage |