diff options
| author | marc <a01200356@itesm.mx> | 2016-06-27 23:35:43 -0500 |
|---|---|---|
| committer | marc <a01200356@itesm.mx> | 2016-08-05 23:51:04 -0500 |
| commit | ad58b14be7cc9a1e95858e150e9d8005734d9232 (patch) | |
| tree | a28f875b0c79183ddc572bd4a5aa3d93f30937f8 /searx | |
| parent | c2e40142879fcb08291471f0306a793fce63c124 (diff) | |
[fix] merge infoboxes based on weight
also minor changes in attributes and images from wikidata
Diffstat (limited to 'searx')
| -rw-r--r-- | searx/engines/wikidata.py | 24 | ||||
| -rw-r--r-- | searx/results.py | 18 | ||||
| -rw-r--r-- | searx/settings.yml | 2 |
3 files changed, 38 insertions, 6 deletions
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index f10fc13f4..91040e218 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -35,7 +35,7 @@ url_detail = wikidata_api\ url_map = 'https://www.openstreetmap.org/'\ + '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M' -url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500' +url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400' # xpaths wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title' @@ -162,6 +162,7 @@ def getDetail(jsonresponse, wikidata_id, language, locale): # INFOBOX ATTRIBUTES (ROWS) + # DATES # inception date add_attribute(attributes, result, 'P571', date=True) # dissolution date @@ -170,11 +171,14 @@ def getDetail(jsonresponse, wikidata_id, language, locale): add_attribute(attributes, result, 'P580', date=True) # end date add_attribute(attributes, result, 'P582', date=True) - # date of birth add_attribute(attributes, result, 'P569', date=True) # date of death add_attribute(attributes, result, 'P570', date=True) + # date of spacecraft launch + add_attribute(attributes, result, 'P619', date=True) + # date of spacecraft landing + add_attribute(attributes, result, 'P620', date=True) # nationality add_attribute(attributes, result, 'P27') @@ -201,7 +205,7 @@ def getDetail(jsonresponse, wikidata_id, language, locale): # area add_attribute(attributes, result, 'P2046') # currency - add_attribute(attributes, result, 'P38') + add_attribute(attributes, result, 'P38', trim=True) # heigth (building) add_attribute(attributes, result, 'P2048') @@ -230,6 +234,10 @@ def getDetail(jsonresponse, wikidata_id, language, locale): add_attribute(attributes, result, 'P264') # publisher add_attribute(attributes, result, 'P123') + # original network + add_attribute(attributes, result, 'P449') + # distributor + add_attribute(attributes, result, 'P750') # composer add_attribute(attributes, result, 'P86') # publication date @@ -266,6 +274,10 @@ def getDetail(jsonresponse, wikidata_id, language, locale): add_attribute(attributes, result, 'P112') # legal form (company/organization) add_attribute(attributes, result, 'P1454') + # operator + add_attribute(attributes, result, 'P137') + # crew members (tripulation) + add_attribute(attributes, result, 'P1029') # taxon add_attribute(attributes, result, 'P225') # chemical formula @@ -300,8 +312,8 @@ def getDetail(jsonresponse, wikidata_id, language, locale): # only returns first match def add_image(result): - # P18: image, P154: logo, P242: map, P41: flag, P2716: collage, P2910: icon - property_ids = ['P18', 'P154', 'P242', 'P41', 'P2716', 'P2910'] + # P15: route map, P242: locator map, P154: logo, P18: image, P242: map, P41: flag, P2716: collage, P2910: icon + property_ids = ['P15', 'P242', 'P154', 'P18', 'P242', 'P41', 'P2716', 'P2910'] for property_id in property_ids: image = result.xpath(property_xpath.replace('{propertyid}', property_id)) @@ -320,6 +332,7 @@ def add_attribute(attributes, result, property_id, default_label=None, date=Fals label = default_label else: label = extract_text(attribute[0].xpath(label_xpath)) + label = label[0].upper() + label[1:] if date: trim = True @@ -369,6 +382,7 @@ def add_url(urls, result, property_id=None, default_label=None, url_prefix=None, dom_element = dom_element[0] if not default_label: label = extract_text(dom_element.xpath(label_xpath)) + label = label[0].upper() + label[1:] if link_type == 'geo': links.append(get_geolink(dom_element)) diff --git a/searx/results.py b/searx/results.py index bf4067b41..9a4ec0b28 100644 --- a/searx/results.py +++ b/searx/results.py @@ -43,6 +43,19 @@ def compare_urls(url_a, url_b): def merge_two_infoboxes(infobox1, infobox2): + # get engines weights + if hasattr(engines[infobox1['engine']], 'weight'): + weight1 = engines[infobox1['engine']].weight + else: + weight1 = 1 + if hasattr(engines[infobox2['engine']], 'weight'): + weight2 = engines[infobox2['engine']].weight + else: + weight2 = 1 + + if weight2 > weight1: + infobox1['engine'] = infobox2['engine'] + if 'urls' in infobox2: urls1 = infobox1.get('urls', None) if urls1 is None: @@ -64,6 +77,8 @@ def merge_two_infoboxes(infobox1, infobox2): img2 = infobox2.get('img_src') if img1 is None: infobox1['img_src'] = img2 + elif weight2 > weight1: + infobox1['img_src'] = img2 if 'attributes' in infobox2: attributes1 = infobox1.get('attributes', None) @@ -77,7 +92,8 @@ def merge_two_infoboxes(infobox1, infobox2): attributeSet.add(attribute.get('label', None)) for attribute in infobox2.get('attributes', []): - attributes1.append(attribute) + if attribute.get('label', None) not in attributeSet: + attributes1.append(attribute) if 'content' in infobox2: content1 = infobox1.get('content', None) diff --git a/searx/settings.yml b/searx/settings.yml index 34393e7c1..38e9f4752 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -105,6 +105,7 @@ engines: - name : ddg definitions engine : duckduckgo_definitions shortcut : ddd + weight : 2 disabled : True - name : digg @@ -127,6 +128,7 @@ engines: - name : wikidata engine : wikidata shortcut : wd + weight : 2 - name : duckduckgo engine : duckduckgo |