From 3aa3a4633f50fa50693636113a4141e266db90d7 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 3 Jan 2015 23:55:50 +0100 Subject: Few fixes on Vimeo Change URL from https to http Change way of handling text xpath --- searx/engines/vimeo.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'searx/engines/vimeo.py') diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index c66c4148a..3949a7299 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -13,24 +13,23 @@ # @todo set content-parameter with correct data from urllib import urlencode -from HTMLParser import HTMLParser from lxml import html -from searx.engines.xpath import extract_text from dateutil import parser +from cgi import escape # engine dependent config categories = ['videos'] paging = True # search-url -base_url = 'https://vimeo.com' +base_url = 'http://vimeo.com' search_url = base_url + '/search/page:{pageno}?{query}' # specific xpath variables +results_xpath = '//div[@id="browse_content"]/ol/li' url_xpath = './a/@href' +title_xpath = './a/div[@class="data"]/p[@class="title"]' content_xpath = './a/img/@src' -title_xpath = './a/div[@class="data"]/p[@class="title"]/text()' -results_xpath = '//div[@id="browse_content"]/ol/li' publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' @@ -39,10 +38,6 @@ def request(query, params): params['url'] = search_url.format(pageno=params['pageno'], query=urlencode({'q': query})) - # TODO required? - params['cookies']['__utma'] =\ - '00000000.000#0000000.0000000000.0000000000.0000000000.0' - return params @@ -52,15 +47,12 @@ def response(resp): dom = html.fromstring(resp.text) - p = HTMLParser() - # parse results for result in dom.xpath(results_xpath): url = base_url + result.xpath(url_xpath)[0] - title = p.unescape(extract_text(result.xpath(title_xpath))) - thumbnail = extract_text(result.xpath(content_xpath)[0]) - publishedDate = parser.parse(extract_text( - result.xpath(publishedDate_xpath)[0])) + title = escape(html.tostring(result.xpath(title_xpath)[0], method='text', encoding='UTF-8').decode("utf-8")) + thumbnail = result.xpath(content_xpath)[0] + publishedDate = parser.parse(result.xpath(publishedDate_xpath)[0]) # append result results.append({'url': url, -- cgit v1.2.3 From 4a195e0b28fdd940e046c442032c816095416fec Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 5 Jan 2015 02:04:23 +0100 Subject: Integrated media in results + Deezer Engine New "embedded" item for the results, allow to give an iframe to display the media directly in the results. Note that the attributes src of the iframes are not set, but instead data-src is set, allowing to only load the iframe when clicked. Deezer engine based on public API (no key). --- searx/engines/vimeo.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'searx/engines/vimeo.py') diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index c66c4148a..fd945b319 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -7,7 +7,7 @@ # @using-api no (TODO, rewrite to api) # @results HTML (using search portal) # @stable no (HTML can change) -# @parse url, title, publishedDate, thumbnail +# @parse url, title, publishedDate, thumbnail, embedded # # @todo rewrite to api # @todo set content-parameter with correct data @@ -33,6 +33,10 @@ title_xpath = './a/div[@class="data"]/p[@class="title"]/text()' results_xpath = '//div[@id="browse_content"]/ol/li' publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' +embedded_url = '' + # do search-request def request(query, params): @@ -56,11 +60,13 @@ def response(resp): # parse results for result in dom.xpath(results_xpath): - url = base_url + result.xpath(url_xpath)[0] + videoid = result.xpath(url_xpath)[0] + url = base_url + videoid title = p.unescape(extract_text(result.xpath(title_xpath))) thumbnail = extract_text(result.xpath(content_xpath)[0]) publishedDate = parser.parse(extract_text( result.xpath(publishedDate_xpath)[0])) + embedded = embedded_url.format(videoid=videoid) # append result results.append({'url': url, @@ -68,6 +74,7 @@ def response(resp): 'content': '', 'template': 'videos.html', 'publishedDate': publishedDate, + 'embedded': embedded, 'thumbnail': thumbnail}) # return results -- cgit v1.2.3