diff options
| author | Adam Tauber <asciimoo@gmail.com> | 2014-09-07 19:12:05 +0200 |
|---|---|---|
| committer | Adam Tauber <asciimoo@gmail.com> | 2014-09-07 19:12:05 +0200 |
| commit | 1e99cf2a0e541a3d2df0104d64fadf955bbccc20 (patch) | |
| tree | a7cb02ab1e19e0f7304b884d88cf838d5a184d1c /searx/engines/bing_news.py | |
| parent | c23db1b2bfb2c9233816fc378927c49b67eeffaf (diff) | |
| parent | a4ffeddce1bc56b0faa548e0485ccd6374c4e9d1 (diff) | |
Merge pull request #93 from dalf/master
yahoo, bing_new and dailymotion fixes
Diffstat (limited to 'searx/engines/bing_news.py')
| -rw-r--r-- | searx/engines/bing_news.py | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 6c99c35dc..279f0d698 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -56,10 +56,14 @@ def response(resp): link = result.xpath('.//div[@class="newstitle"]/a')[0] url = link.attrib.get('href') title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()'))) - + contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()') + if contentXPath != None: + content = escape(' '.join(contentXPath)) + # parse publishedDate - publishedDate = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_ST"]//span[@class="sn_tm"]//text()'))) + publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()') + if publishedDateXPath != None: + publishedDate = escape(' '.join(publishedDateXPath)) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) @@ -74,9 +78,18 @@ def response(resp): publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) + elif re.match("^[0-9]+ day(s|) ago$", publishedDate): + timeNumbers = re.findall(r'\d+', publishedDate) + publishedDate = datetime.now()\ + - timedelta(days=int(timeNumbers[0])) else: - publishedDate = parser.parse(publishedDate) - + try: + # FIXME use params['language'] to parse either mm/dd or dd/mm + publishedDate = parser.parse(publishedDate, dayfirst=False) + except TypeError: + # FIXME + publishedDate = datetime.now() + # append result results.append({'url': url, 'title': title, |