summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorDalf <alex@al-f.net>2014-09-22 23:39:21 +0200
committerDalf <alex@al-f.net>2014-09-22 23:39:21 +0200
commite39d9fe5423a0fceed1d15dc63c1f8aa30d72e44 (patch)
treec03b6f96be60db757e6823d9c8669bda1717afd9 /searx
parent6b058962e1f87a17ce2d9c2bcb4faa73df285df3 (diff)
update comment
Diffstat (limited to 'searx')
-rw-r--r--searx/search.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/searx/search.py b/searx/search.py
index 10916cc50..48f8012f1 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -106,8 +106,13 @@ def score_results(results):
res['host'] = res['host'].replace('www.', '', 1)
res['engines'] = [res['engine']]
+
weight = 1.0
+ # strip multiple spaces and cariage returns from content
+ if 'content' in res:
+ res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
+
# get weight of this engine if possible
if hasattr(engines[res['engine']], 'weight'):
weight = float(engines[res['engine']].weight)
@@ -115,12 +120,8 @@ def score_results(results):
# calculate score for that engine
score = int((flat_len - i) / engines_len) * weight + 1
- duplicated = False
-
# check for duplicates
- if 'content' in res:
- res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
-
+ duplicated = False
for new_res in results:
# remove / from the end of the url if required
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa