[ckan-changes] commit/ckan: dread: [misc, tests]: #1180 Improve filtering for links in markdown.
Bitbucket
commits-noreply at bitbucket.org
Wed Jun 8 14:17:24 UTC 2011
1 new changeset in ckan:
http://bitbucket.org/okfn/ckan/changeset/0fbfb5e368c0/
changeset: 0fbfb5e368c0
branch: release-v1.4.1
user: dread
date: 2011-06-08 16:16:42
summary: [misc,tests]: #1180 Improve filtering for links in markdown.
affected #: 5 files (3.3 KB)
--- a/ckan/misc.py Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/misc.py Wed Jun 08 15:16:42 2011 +0100
@@ -16,18 +16,27 @@
internal_link = re.compile('(package|tag|group):([a-z0-9\-_]+)')
normal_link = re.compile('<(http:[^>]+)>')
- html_whitelist = 'a b center li ol p table td tr ul'.split(' ')
+ html_whitelist = 'b center li ol p table td tr ul'.split(' ')
whitelist_elem = re.compile(r'<(\/?(%s)[^>]*)>' % "|".join(html_whitelist), re.IGNORECASE)
whitelist_escp = re.compile(r'\\xfc\\xfd(\/?(%s)[^>]*?)\\xfd\\xfc' % "|".join(html_whitelist), re.IGNORECASE)
- html_link = re.compile(r'<a href="([^"]*)">')
+ normal_link = re.compile(r'<a[^>]*?href="([^"]*?)"[^>]*?>', re.IGNORECASE)
+ abbrev_link = re.compile(r'<(http://[^>]*)>', re.IGNORECASE)
+ any_link = re.compile(r'<a[^>]*?>', re.IGNORECASE)
+ close_link = re.compile(r'<(\/a[^>]*)>', re.IGNORECASE)
+ link_escp = re.compile(r'\\xfc\\xfd(\/?(%s)[^>]*?)\\xfd\\xfc' % "|".join(['a']), re.IGNORECASE)
def to_html(self, text):
if text is None:
return ''
-
# Encode whitelist elements.
text = self.whitelist_elem.sub(r'\\\\xfc\\\\xfd\1\\\\xfd\\\\xfc', text)
+ # Encode links only in an acceptable format (guard against spammers)
+ text = self.normal_link.sub(r'\\\\xfc\\\\xfda href="\1" target="_blank" rel="nofollow"\\\\xfd\\\\xfc', text)
+ text = self.abbrev_link.sub(r'\\\\xfc\\\\xfda href="\1" target="_blank" rel="nofollow"\\\\xfd\\\\xfc\1</a>', text)
+ text = self.any_link.sub(r'\\\\xfc\\\\xfda href="TAG MALFORMED" target="_blank" rel="nofollow"\\\\xfd\\\\xfc', text)
+ text = self.close_link.sub(r'\\\\xfc\\\\xfd\1\\\\xfd\\\\xfc', text)
+
# Convert internal links.
text = self.internal_link.sub(r'[\1:\2] (/\1/\2)', text)
@@ -42,8 +51,6 @@
# Decode whitelist elements.
text = self.whitelist_escp.sub(r'<\1>', text)
-
- # Make links safer.
- text = self.html_link.sub(r'<a href="\1" target="_blank" rel="nofollow">', text)
+ text = self.link_escp.sub(r'<\1>', text)
return text
--- a/ckan/tests/functional/test_package.py Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/functional/test_package.py Wed Jun 08 15:16:42 2011 +0100
@@ -1383,7 +1383,7 @@
self.body = str(self.res)
self.assert_fragment('<table width="100%" border="1">')
self.assert_fragment('<td rowspan="2"><b>Description</b></td>')
- self.assert_fragment('<a href="http://www.nber.org/patents/subcategories.txt">subcategory.txt</a>')
+ self.assert_fragment('<a href="http://www.nber.org/patents/subcategories.txt" target="_blank" rel="nofollow">subcategory.txt</a>')
self.assert_fragment('<td colspan="2"><center>--</center></td>')
self.fail_if_fragment('<script>')
--- a/ckan/tests/functional/test_user.py Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/functional/test_user.py Wed Jun 08 15:16:42 2011 +0100
@@ -8,9 +8,6 @@
class TestUserController(FunctionalTestCase, HtmlCheckMethods):
@classmethod
def setup_class(self):
- model.repo.init_db()
- model.repo.rebuild_db()
- model.repo.init_db()
CreateTestData.create()
# make 3 changes, authored by annafan
@@ -21,6 +18,10 @@
rev.author = u'annafan'
model.repo.commit_and_remove()
+ CreateTestData.create_user('unfinisher', about='<a href="http://unfinished.tag')
+ CreateTestData.create_user('uncloser', about='<a href="http://unclosed.tag">')
+ CreateTestData.create_user('spammer', about=u'<a href="http://mysite">mysite</a><a href=\u201dhttp://test2\u201d>test2</a>')
+
@classmethod
def teardown_class(self):
model.repo.rebuild_db()
@@ -65,6 +66,39 @@
assert 'My Account' in main_res, main_res
assert 'Edit' in main_res, main_res
+ def test_user_read_about_unfinished(self):
+ user = model.User.by_name(u'unfinisher')
+ offset = '/user/%s' % user.id
+ res = self.app.get(offset, status=200)
+ main_res = self.main_div(res)
+ assert 'unfinisher' in res, res
+ assert '<a href="http://unfinished.tag' in main_res, main_res
+
+ def test_user_read_about_unclosed(self):
+ user = model.User.by_name(u'uncloser')
+ offset = '/user/%s' % user.id
+ res = self.app.get(offset, status=200)
+ main_res = self.main_div(res)
+ assert 'unclosed' in res, res
+ # tag gets closed by genshi
+ assert '<a href="http://unclosed.tag" target="_blank" rel="nofollow">\n</a>' in main_res, main_res
+
+ def test_user_read_about_spam(self):
+ user = model.User.by_name(u'spammer')
+ offset = '/user/%s' % user.id
+ res = self.app.get(offset, status=200)
+ main_res = self.main_div(res)
+ assert 'spammer' in res, res
+ self.check_named_element(res, 'a',
+ 'href="http://mysite"',
+ 'target="_blank"',
+ 'rel="nofollow"')
+
+ self.check_named_element(res, 'a',
+ 'href="TAG MALFORMED"',
+ 'target="_blank"',
+ 'rel="nofollow"')
+
def test_user_login(self):
offset = url_for(controller='user', action='login', id=None)
res = self.app.get(offset, status=200)
--- a/ckan/tests/misc/test_format_text.py Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/misc/test_format_text.py Wed Jun 08 15:16:42 2011 +0100
@@ -34,15 +34,37 @@
def test_internal_link(self):
instr = 'package:test-_pkg'
- exp = '<a href="/package/test-_pkg" target="_blank" rel="nofollow">package:test-_pkg</a>'
+ exp = '<a href="/package/test-_pkg">package:test-_pkg</a>'
format = MarkdownFormat()
out = format.to_html(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
def test_normal_link(self):
- instr = '<http:/somelink/>'
- exp = '<a href="http:/somelink/" target="_blank" rel="nofollow">http:/somelink/</a>'
+ instr = '<http://somelink/>'
+ exp = '<a href="http://somelink/" target="_blank" rel="nofollow">http://somelink/</a>'
format = MarkdownFormat()
out = format.to_html(instr)
assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
+ def test_malformed_link_1(self):
+ instr = u'<a href=\u201dsomelink\u201d>somelink</a>'
+ exp = '<a href="TAG MALFORMED" target="_blank" rel="nofollow">somelink</a>'
+ format = MarkdownFormat()
+ out = format.to_html(instr)
+ assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
+
+ def test_malformed_link_2(self):
+ instr = u'<a href="http://url.com> url >'
+ exp = '<a href="TAG MALFORMED" target="_blank" rel="nofollow"> url >'
+ format = MarkdownFormat()
+ out = format.to_html(instr)
+ assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
+
+ def test_malformed_link_3(self):
+ instr = u'<a href="http://url.com"> url'
+ exp = '<a href="http://url.com" target="_blank" rel="nofollow"> url'
+ # NB when this is put into Genshi, it will close the tag for you.
+ format = MarkdownFormat()
+ out = format.to_html(instr)
+ assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
+
--- a/ckan/tests/models/test_package.py Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/models/test_package.py Wed Jun 08 15:16:42 2011 +0100
@@ -87,7 +87,7 @@
assert out['metadata_modified'] == pkg.metadata_modified.isoformat()
assert out['metadata_created'] == pkg.metadata_created.isoformat()
assert_equal(out['notes'], pkg.notes)
- assert_equal(out['notes_rendered'], '<p>A <b>great</b> package [HTML_REMOVED] like <a href="/package/pollution_stats" target="_blank" rel="nofollow">package:pollution_stats</a>\n</p>')
+ assert_equal(out['notes_rendered'], '<p>A <b>great</b> package [HTML_REMOVED] like <a href="/package/pollution_stats">package:pollution_stats</a>\n</p>')
class TestPackageWithTags:
Repository URL: https://bitbucket.org/okfn/ckan/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list