summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--searx/engines/__init__.py2
-rw-r--r--searx/engines/filecrop.py71
-rw-r--r--searx/engines/yacy.py38
-rw-r--r--searx/templates/about.html2
-rw-r--r--searx/webapp.py3
6 files changed, 116 insertions, 3 deletions
diff --git a/.gitignore b/.gitignore
index 4cc20423c..76ae1ca2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
env
engines.cfg
+
+*.pyc
+*/*.pyc \ No newline at end of file
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 43ad1b52e..1ca11ff90 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -261,7 +261,7 @@ def get_engines_stats():
for engine in errors:
if max_errors:
- engine['percentage'] = int(engine['avg']/max_errors*100)
+ engine['percentage'] = int(float(engine['avg'])/max_errors*100)
else:
engine['percentage'] = 0
diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py
new file mode 100644
index 000000000..52426b84a
--- /dev/null
+++ b/searx/engines/filecrop.py
@@ -0,0 +1,71 @@
+from urllib import urlencode
+from HTMLParser import HTMLParser
+
+url = 'http://www.filecrop.com/'
+search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1'
+
+class FilecropResultParser(HTMLParser):
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.__start_processing = False
+
+ self.results = []
+ self.result = {}
+
+ self.tr_counter = 0
+ self.data_counter = 0
+
+ def handle_starttag(self, tag, attrs):
+
+ if tag == 'tr':
+ if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs:
+ self.__start_processing = True
+
+ if not self.__start_processing:
+ return
+
+ if tag == 'label':
+ self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
+ elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs:
+ if 'content' in self.result:
+ self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0]
+ else:
+ self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
+ self.result['content'] += ' '
+ elif tag == 'a':
+ self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0]
+
+ def handle_endtag(self, tag):
+ if self.__start_processing is False:
+ return
+
+ if tag == 'tr':
+ self.tr_counter += 1
+
+ if self.tr_counter == 2:
+ self.__start_processing = False
+ self.tr_counter = 0
+ self.data_counter = 0
+ self.results.append(self.result)
+ self.result = {}
+
+ def handle_data(self, data):
+ if not self.__start_processing:
+ return
+
+ if 'content' in self.result:
+ self.result['content'] += data + ' '
+ else:
+ self.result['content'] = data + ' '
+
+ self.data_counter += 1
+
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'w' :query}))
+ return params
+
+def response(resp):
+ parser = FilecropResultParser()
+ parser.feed(resp.text)
+
+ return parser.results
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
new file mode 100644
index 000000000..c93ac522f
--- /dev/null
+++ b/searx/engines/yacy.py
@@ -0,0 +1,38 @@
+from json import loads
+from urllib import urlencode
+
+url = 'http://localhost:8090'
+search_url = '/yacysearch.json?{query}&maximumRecords=10'
+
+def request(query, params):
+ params['url'] = url + search_url.format(query=urlencode({'query':query}))
+ return params
+
+def response(resp):
+ raw_search_results = loads(resp.text)
+
+ if not len(raw_search_results):
+ return []
+
+ search_results = raw_search_results.get('channels', {})[0].get('items', [])
+
+ results = []
+
+ for result in search_results:
+ tmp_result = {}
+ tmp_result['title'] = result['title']
+ tmp_result['url'] = result['link']
+ tmp_result['content'] = ''
+
+ if len(result['description']):
+ tmp_result['content'] += result['description'] +"<br/>"
+
+ if len(result['pubDate']):
+ tmp_result['content'] += result['pubDate'] + "<br/>"
+
+ if result['size'] != '-1':
+ tmp_result['content'] += result['sizename']
+
+ results.append(tmp_result)
+
+ return results
diff --git a/searx/templates/about.html b/searx/templates/about.html
index b90757914..14a0080f9 100644
--- a/searx/templates/about.html
+++ b/searx/templates/about.html
@@ -37,7 +37,7 @@
<p>It's ok if you don't trust us regarding the logs, <a href="https://github.com/asciimoo/searx">take the code</a> and run it yourself! decentralize!</p>
<h3>How to add to firefox?</h3>
<p><a href="#" onclick="window.external.AddSearchProvider(window.location.protocol + '//' + window.location.host + '/opensearch.xml')">Install</a> searx as a search engine on any version of Firefox! (javascript required)</p>
-<h2 id="faq">Developer FAQ</h2>
+<h2 id="dev_faq">Developer FAQ</h2>
<h3>New engines?</h3>
<p><ul>
<li>Edit your engines.cfg, see <a href="https://raw.github.com/asciimoo/searx/master/engines.cfg_sample">sample config</a></li>
diff --git a/searx/webapp.py b/searx/webapp.py
index b7e2a4674..2d48f2323 100644
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -152,7 +152,8 @@ def preferences():
selected_categories.append(category)
if selected_categories:
resp = make_response(redirect('/'))
- resp.set_cookie('categories', ','.join(selected_categories))
+ # cookie max age: 4 weeks
+ resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4)
return resp
return render('preferences.html')