From a0a1284998946bdc446283552674263240b4fd0f Mon Sep 17 00:00:00 2001 From: marc Date: Mon, 6 Jun 2016 01:08:36 -0500 Subject: wikidata refactor and more attributes (see issue #560) --- tests/unit/engines/test_wikidata.py | 502 ++++++++++++++++++++++++++++++++++++ 1 file changed, 502 insertions(+) create mode 100644 tests/unit/engines/test_wikidata.py (limited to 'tests') diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py new file mode 100644 index 000000000..99d8540cf --- /dev/null +++ b/tests/unit/engines/test_wikidata.py @@ -0,0 +1,502 @@ +# -*- coding: utf-8 -*- +from json import loads +from lxml.html import fromstring +from collections import defaultdict +import mock +from searx.engines import wikidata +from searx.testing import SearxTestCase + + +class TestWikidataEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['language'] = 'all' + params = wikidata.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wikidata.org', params['url']) + self.assertIn('en', params['url']) + + dicto['language'] = 'es_ES' + params = wikidata.request(query, dicto) + self.assertIn(query, params['url']) + self.assertIn('es', params['url']) + + # successful cases are not tested here to avoid sending additional requests + def test_response(self): + self.assertRaises(AttributeError, wikidata.response, None) + self.assertRaises(AttributeError, wikidata.response, []) + self.assertRaises(AttributeError, wikidata.response, '') + self.assertRaises(AttributeError, wikidata.response, '[]') + + response = mock.Mock(content='', search_params={"language": "all"}) + self.assertEqual(wikidata.response(response), []) + + def test_getDetail(self): + response = {} + results = wikidata.getDetail(response, "Q123", "en", "en-US") + self.assertEqual(results, []) + + title_html = '
Test
' + html = """ +
+
+
+
+ +
+
+ """ + response = {"parse": {"displaytitle": title_html, "text": html}} + + results = wikidata.getDetail(response, "Q123", "en", "en-US") + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Test') + + title_html = """ +
+
+ Test + English +
+
+ """ + html = """ +
+
+ Description + English +
+ +
+ +
+
+ """ + response = {"parse": {"displaytitle": title_html, "text": html}} + + results = wikidata.getDetail(response, "Q123", "yua", "yua_MX") + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], 'official website') + self.assertEqual(results[0]['url'], 'https://officialsite.com') + + self.assertEqual(results[1]['infobox'], 'Test') + self.assertEqual(results[1]['id'], None) + self.assertEqual(results[1]['content'], 'Description') + self.assertEqual(results[1]['attributes'], []) + self.assertEqual(results[1]['urls'][0]['title'], 'official website') + self.assertEqual(results[1]['urls'][0]['url'], 'https://officialsite.com') + self.assertEqual(results[1]['urls'][1]['title'], 'Wikipedia (en)') + self.assertEqual(results[1]['urls'][1]['url'], 'https://en.wikipedia.org/wiki/Test') + + def test_add_image(self): + image_src = wikidata.add_image(fromstring("
")) + self.assertEqual(image_src, None) + + html = u""" +
+
+ +
+
+
+ +
+
+ +
+
+
+
+
+ """ + html_etree = fromstring(html) + + image_src = wikidata.add_image(html_etree) + self.assertEqual(image_src, "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500") + + html = u""" +
+
+ +
+
+
+ +
+
+ +
+
+
+
+
+ +
+
+
+ +
+
+ +
+
+
+
+
+ """ + html_etree = fromstring(html) + + image_src = wikidata.add_image(html_etree) + self.assertEqual(image_src, "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500") + + def test_add_attribute(self): + html = u""" +
+
+ +
+
+
+ +
+ +
+
+
+
+ """ + attributes = [] + html_etree = fromstring(html) + + wikidata.add_attribute(attributes, html_etree, "Fail") + self.assertEqual(attributes, []) + + wikidata.add_attribute(attributes, html_etree, "P27") + self.assertEqual(len(attributes), 1) + self.assertEqual(attributes[0]["label"], "country of citizenship") + self.assertEqual(attributes[0]["value"], "United Kingdom") + + html = u""" +
+
+ +
+
+
+ +
+
+
+
+ 27 January 1832 + + Gregorian + +
+
+
+
+
+
+
+ """ + attributes = [] + html_etree = fromstring(html) + wikidata.add_attribute(attributes, html_etree, "P569", date=True) + self.assertEqual(len(attributes), 1) + self.assertEqual(attributes[0]["label"], "date of birth") + self.assertEqual(attributes[0]["value"], "27 January 1832") + + html = u""" +
+
+ +
+
+
+ +
+ +
+
+
+ +
+ +
+
+
+
+ """ + attributes = [] + html_etree = fromstring(html) + wikidata.add_attribute(attributes, html_etree, "P6") + self.assertEqual(len(attributes), 1) + self.assertEqual(attributes[0]["label"], "head of government") + self.assertEqual(attributes[0]["value"], "Old Prime Minister, Actual Prime Minister") + + attributes = [] + html_etree = fromstring(html) + wikidata.add_attribute(attributes, html_etree, "P6", trim=True) + self.assertEqual(len(attributes), 1) + self.assertEqual(attributes[0]["value"], "Actual Prime Minister") + + def test_add_url(self): + html = u""" +
+ +
+ """ + urls = [] + html_etree = fromstring(html) + wikidata.add_url(urls, html_etree, 'P856') + self.assertEquals(len(urls), 1) + self.assertIn({'title': 'official website', 'url': 'https://searx.me/'}, urls) + urls = [] + results = [] + wikidata.add_url(urls, html_etree, 'P856', 'custom label', results=results) + self.assertEquals(len(urls), 1) + self.assertEquals(len(results), 1) + self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, urls) + self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, results) + + html = u""" +
+ +
+ """ + urls = [] + html_etree = fromstring(html) + wikidata.add_url(urls, html_etree, 'P856') + self.assertEquals(len(urls), 2) + self.assertIn({'title': 'official website', 'url': 'http://www.worldofwarcraft.com'}, urls) + self.assertIn({'title': 'official website', 'url': 'http://eu.battle.net/wow/en/'}, urls) + + def test_get_imdblink(self): + html = u""" +
+
+
+ +
+
+
+ """ + html_etree = fromstring(html) + imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/') + + html = u""" +
+
+
+ +
+
+
+ """ + html_etree = fromstring(html) + imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/') + self.assertIn('https://www.imdb.com/name/nm4915994', imdblink) + + def test_get_geolink(self): + html = u""" +
+
+
+
+ 60°N, 40°E +
+
+
+
+ """ + html_etree = fromstring(html) + geolink = wikidata.get_geolink(html_etree) + self.assertIn('https://www.openstreetmap.org/', geolink) + self.assertIn('lat=60&lon=40', geolink) + + html = u""" +
+
+
+
+ 34°35'59"S, 58°22'55"W +
+
+
+
+ """ + html_etree = fromstring(html) + geolink = wikidata.get_geolink(html_etree) + self.assertIn('https://www.openstreetmap.org/', geolink) + self.assertIn('lat=-34.59', geolink) + self.assertIn('lon=-58.38', geolink) + + def test_get_wikilink(self): + html = """ +
+
+ +
+
+ +
+
+ """ + html_etree = fromstring(html) + wikilink = wikidata.get_wikilink(html_etree, 'nowiki') + self.assertEqual(wikilink, None) + wikilink = wikidata.get_wikilink(html_etree, 'enwiki') + self.assertEqual(wikilink, 'https://en.wikipedia.org/wiki/Test') + wikilink = wikidata.get_wikilink(html_etree, 'arwiki') + self.assertEqual(wikilink, 'https://ar.wikipedia.org/wiki/Test') + wikilink = wikidata.get_wikilink(html_etree, 'enwikiquote') + self.assertEqual(wikilink, 'https://en.wikiquote.org/wiki/Test') -- cgit v1.2.3 From ad58b14be7cc9a1e95858e150e9d8005734d9232 Mon Sep 17 00:00:00 2001 From: marc Date: Mon, 27 Jun 2016 23:35:43 -0500 Subject: [fix] merge infoboxes based on weight also minor changes in attributes and images from wikidata --- tests/unit/engines/test_wikidata.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'tests') diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py index 99d8540cf..ec5f52ef9 100644 --- a/tests/unit/engines/test_wikidata.py +++ b/tests/unit/engines/test_wikidata.py @@ -95,14 +95,14 @@ class TestWikidataEngine(SearxTestCase): results = wikidata.getDetail(response, "Q123", "yua", "yua_MX") self.assertEqual(len(results), 2) - self.assertEqual(results[0]['title'], 'official website') + self.assertEqual(results[0]['title'], 'Official website') self.assertEqual(results[0]['url'], 'https://officialsite.com') self.assertEqual(results[1]['infobox'], 'Test') self.assertEqual(results[1]['id'], None) self.assertEqual(results[1]['content'], 'Description') self.assertEqual(results[1]['attributes'], []) - self.assertEqual(results[1]['urls'][0]['title'], 'official website') + self.assertEqual(results[1]['urls'][0]['title'], 'Official website') self.assertEqual(results[1]['urls'][0]['url'], 'https://officialsite.com') self.assertEqual(results[1]['urls'][1]['title'], 'Wikipedia (en)') self.assertEqual(results[1]['urls'][1]['url'], 'https://en.wikipedia.org/wiki/Test') @@ -141,7 +141,8 @@ class TestWikidataEngine(SearxTestCase): html_etree = fromstring(html) image_src = wikidata.add_image(html_etree) - self.assertEqual(image_src, "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500") + self.assertEqual(image_src, + "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500&height=400") html = u"""
@@ -196,7 +197,8 @@ class TestWikidataEngine(SearxTestCase): html_etree = fromstring(html) image_src = wikidata.add_image(html_etree) - self.assertEqual(image_src, "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500") + self.assertEqual(image_src, + "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500&height=400") def test_add_attribute(self): html = u""" @@ -234,7 +236,7 @@ class TestWikidataEngine(SearxTestCase): wikidata.add_attribute(attributes, html_etree, "P27") self.assertEqual(len(attributes), 1) - self.assertEqual(attributes[0]["label"], "country of citizenship") + self.assertEqual(attributes[0]["label"], "Country of citizenship") self.assertEqual(attributes[0]["value"], "United Kingdom") html = u""" @@ -269,7 +271,7 @@ class TestWikidataEngine(SearxTestCase): html_etree = fromstring(html) wikidata.add_attribute(attributes, html_etree, "P569", date=True) self.assertEqual(len(attributes), 1) - self.assertEqual(attributes[0]["label"], "date of birth") + self.assertEqual(attributes[0]["label"], "Date of birth") self.assertEqual(attributes[0]["value"], "27 January 1832") html = u""" @@ -317,7 +319,7 @@ class TestWikidataEngine(SearxTestCase): html_etree = fromstring(html) wikidata.add_attribute(attributes, html_etree, "P6") self.assertEqual(len(attributes), 1) - self.assertEqual(attributes[0]["label"], "head of government") + self.assertEqual(attributes[0]["label"], "Head of government") self.assertEqual(attributes[0]["value"], "Old Prime Minister, Actual Prime Minister") attributes = [] @@ -355,7 +357,7 @@ class TestWikidataEngine(SearxTestCase): html_etree = fromstring(html) wikidata.add_url(urls, html_etree, 'P856') self.assertEquals(len(urls), 1) - self.assertIn({'title': 'official website', 'url': 'https://searx.me/'}, urls) + self.assertIn({'title': 'Official website', 'url': 'https://searx.me/'}, urls) urls = [] results = [] wikidata.add_url(urls, html_etree, 'P856', 'custom label', results=results) @@ -403,8 +405,8 @@ class TestWikidataEngine(SearxTestCase): html_etree = fromstring(html) wikidata.add_url(urls, html_etree, 'P856') self.assertEquals(len(urls), 2) - self.assertIn({'title': 'official website', 'url': 'http://www.worldofwarcraft.com'}, urls) - self.assertIn({'title': 'official website', 'url': 'http://eu.battle.net/wow/en/'}, urls) + self.assertIn({'title': 'Official website', 'url': 'http://www.worldofwarcraft.com'}, urls) + self.assertIn({'title': 'Official website', 'url': 'http://eu.battle.net/wow/en/'}, urls) def test_get_imdblink(self): html = u""" -- cgit v1.2.3