Merge remote-tracking branch 'asciimoo/master'

author: Thomas Pointhuber <thomas.pointhuber@gmx.at> 2014-03-04 15:06:27 +0100
committer: Thomas Pointhuber <thomas.pointhuber@gmx.at> 2014-03-04 15:06:27 +0100
commit: 07f83cab224d2ddf1f7fd8b544f2f2d6679c2416 (patch)
tree: 704746222d964c82213efaa75840f6f85e85cde9 /searx
parent: fe35c86c947f503ae2e1e7633d3355204e48ae12 (diff)
parent: 71c2e8222bc5d7115e8e2ed415057f66da3a2f09 (diff)
4 files changed, 8 insertions, 5 deletions
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index ca27a5b2f..935718609 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -6,7 +6,7 @@ from json import loads
 categories = ['news']
 
 url = 'https://ajax.googleapis.com/'
-search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
+search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
 
 paging = True
 language_support = True
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index f83b4b967..f070b8a7d 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -35,7 +35,7 @@ def response(resp):
 
     for result in dom.xpath(results_xpath):
         url_string = extract_url(result.xpath(url_xpath), search_url)
-        start = url_string.find('/RU=')+4
+        start = url_string.find('http', url_string.find('/RU=')+1)
         end = url_string.rfind('/RS')
         url = unquote(url_string[start:end])
         title = extract_text(result.xpath(title_xpath)[0])
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index 6ece496cd..3c257866c 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -35,7 +35,7 @@ def response(resp):
 
     for result in dom.xpath(results_xpath):
         url_string = extract_url(result.xpath(url_xpath), search_url)
-        start = url_string.find('/RU=')+4
+        start = url_string.find('http', url_string.find('/RU=')+1)
         end = url_string.rfind('/RS')
         url = unquote(url_string[start:end])
         title = extract_text(result.xpath(title_xpath)[0])
diff --git a/searx/utils.py b/searx/utils.py
index af8ce952e..b99a945df 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -4,12 +4,15 @@ import csv
 from codecs import getincrementalencoder
 import cStringIO
 import re
+from random import choice
 
+ua_versions = ('26.0', '27.0', '28.0')
+ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0')
+ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
 
 def gen_useragent():
     # TODO
-    ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
-    return ua
+    return ua.format(os=choice(ua_os), version=choice(ua_versions))
 
 
 def highlight_content(content, query):
author	Thomas Pointhuber <thomas.pointhuber@gmx.at>	2014-03-04 15:06:27 +0100
committer	Thomas Pointhuber <thomas.pointhuber@gmx.at>	2014-03-04 15:06:27 +0100
commit	07f83cab224d2ddf1f7fd8b544f2f2d6679c2416 (patch)
tree	704746222d964c82213efaa75840f6f85e85cde9 /searx
parent	fe35c86c947f503ae2e1e7633d3355204e48ae12 (diff)
parent	71c2e8222bc5d7115e8e2ed415057f66da3a2f09 (diff)