diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2014-09-02 22:00:30 +0200 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2014-09-02 22:00:30 +0200 |
| commit | f825752145c3a94e078e7ba23a864e4ac37869f4 (patch) | |
| tree | 63e072174ffd8d588a5f54ac02c0db9ea50b257b /searx/engines/stackoverflow.py | |
| parent | f36d1e28fae212b8b8640324d2e787b73305e2d2 (diff) | |
| parent | 629a05e149eaaab05a724dd3915ed363c364c796 (diff) | |
Merge pull request #89 from pointhi/engines
update search engines and add comments to it
Diffstat (limited to 'searx/engines/stackoverflow.py')
| -rw-r--r-- | searx/engines/stackoverflow.py | 42 |
1 files changed, 35 insertions, 7 deletions
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index e24b309c1..edbe74a70 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -1,30 +1,58 @@ +## Stackoverflow (It) +# +# @website https://stackoverflow.com/ +# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, content + from urlparse import urljoin from cgi import escape from urllib import urlencode from lxml import html +# engine dependent config categories = ['it'] +paging = True +# search-url url = 'http://stackoverflow.com/' search_url = url+'search?{query}&page={pageno}' -result_xpath = './/div[@class="excerpt"]//text()' -paging = True +# specific xpath variables +results_xpath = '//div[contains(@class,"question-summary")]' +link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' +title_xpath = './/text()' +content_xpath = './/div[@class="excerpt"]//text()' +# do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']) + return params +# get response from search-request def response(resp): results = [] + dom = html.fromstring(resp.text) - for result in dom.xpath('//div[@class="question-summary search-result"]'): - link = result.xpath('.//div[@class="result-link"]//a')[0] + + # parse results + for result in dom.xpath(results_xpath): + link = result.xpath(link_xpath)[0] href = urljoin(url, link.attrib.get('href')) - title = escape(' '.join(link.xpath('.//text()'))) - content = escape(' '.join(result.xpath(result_xpath))) - results.append({'url': href, 'title': title, 'content': content}) + title = escape(' '.join(link.xpath(title_xpath))) + content = escape(' '.join(result.xpath(content_xpath))) + + # append result + results.append({'url': href, + 'title': title, + 'content': content}) + + # return results return results |