diff options
| author | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-09-02 20:14:52 +0200 |
|---|---|---|
| committer | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-09-02 20:14:52 +0200 |
| commit | 9460750feab250d383080342a7bb0a5fe2e2392d (patch) | |
| tree | 000665ba10a830ab330a45e390e755b870f235f2 | |
| parent | 678a80f043d2f57f059236b574cc29fab4f70fe8 (diff) | |
fix twitter engine and add comments
* add language-support
* add comments
* little refactoring
| -rw-r--r-- | searx/engines/twitter.py | 37 |
1 files changed, 35 insertions, 2 deletions
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index c05c20fc2..8de78144e 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -1,30 +1,63 @@ +## Twitter (Social media) +# +# @website https://www.bing.com/news +# @provide-api yes (https://dev.twitter.com/docs/using-search) +# +# @using-api no +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, content +# +# @todo publishedDate + from urlparse import urljoin from urllib import urlencode from lxml import html from cgi import escape +# engine dependent config categories = ['social media'] +language_support = True +# search-url base_url = 'https://twitter.com/' search_url = base_url+'search?' + +# specific xpath variables +results_xpath = '//li[@data-item-type="tweet"]' +link_xpath = './/small[@class="time"]//a' title_xpath = './/span[@class="username js-action-profile-name"]//text()' content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' +# do search-request def request(query, params): params['url'] = search_url + urlencode({'q': query}) + + # set language if specified + if params['language'] != 'all': + params['cookies']['lang'] = params['language'].split('_')[0] + return params +# get response from search-request def response(resp): results = [] + dom = html.fromstring(resp.text) - for tweet in dom.xpath('//li[@data-item-type="tweet"]'): - link = tweet.xpath('.//small[@class="time"]//a')[0] + + # parse results + for tweet in dom.xpath(results_xpath): + link = tweet.xpath(link_xpath)[0] url = urljoin(base_url, link.attrib.get('href')) title = ''.join(tweet.xpath(title_xpath)) content = escape(''.join(tweet.xpath(content_xpath))) + + # append result results.append({'url': url, 'title': title, 'content': content}) + + # return results return results |