[ckan-changes] commit/ckan: dread: [misc, tests]: #1180 Improve filtering for links in markdown.

Bitbucket commits-noreply at bitbucket.org
Wed Jun 8 14:17:24 UTC 2011


1 new changeset in ckan:

http://bitbucket.org/okfn/ckan/changeset/0fbfb5e368c0/
changeset:   0fbfb5e368c0
branch:      release-v1.4.1
user:        dread
date:        2011-06-08 16:16:42
summary:     [misc,tests]: #1180 Improve filtering for links in markdown.
affected #:  5 files (3.3 KB)

--- a/ckan/misc.py	Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/misc.py	Wed Jun 08 15:16:42 2011 +0100
@@ -16,18 +16,27 @@
     internal_link = re.compile('(package|tag|group):([a-z0-9\-_]+)')
     normal_link = re.compile('<(http:[^>]+)>')
 
-    html_whitelist = 'a b center li ol p table td tr ul'.split(' ')
+    html_whitelist = 'b center li ol p table td tr ul'.split(' ')
     whitelist_elem = re.compile(r'<(\/?(%s)[^>]*)>' % "|".join(html_whitelist), re.IGNORECASE)
     whitelist_escp = re.compile(r'\\xfc\\xfd(\/?(%s)[^>]*?)\\xfd\\xfc' % "|".join(html_whitelist), re.IGNORECASE)
-    html_link = re.compile(r'<a href="([^"]*)">')
+    normal_link = re.compile(r'<a[^>]*?href="([^"]*?)"[^>]*?>', re.IGNORECASE)
+    abbrev_link = re.compile(r'<(http://[^>]*)>', re.IGNORECASE)
+    any_link = re.compile(r'<a[^>]*?>', re.IGNORECASE)
+    close_link = re.compile(r'<(\/a[^>]*)>', re.IGNORECASE)
+    link_escp = re.compile(r'\\xfc\\xfd(\/?(%s)[^>]*?)\\xfd\\xfc' % "|".join(['a']), re.IGNORECASE)
     
     def to_html(self, text):
         if text is None:
             return ''
-
         # Encode whitelist elements.
         text = self.whitelist_elem.sub(r'\\\\xfc\\\\xfd\1\\\\xfd\\\\xfc', text)
 
+        # Encode links only in an acceptable format (guard against spammers)
+        text = self.normal_link.sub(r'\\\\xfc\\\\xfda href="\1" target="_blank" rel="nofollow"\\\\xfd\\\\xfc', text)
+        text = self.abbrev_link.sub(r'\\\\xfc\\\\xfda href="\1" target="_blank" rel="nofollow"\\\\xfd\\\\xfc\1</a>', text)
+        text = self.any_link.sub(r'\\\\xfc\\\\xfda href="TAG MALFORMED" target="_blank" rel="nofollow"\\\\xfd\\\\xfc', text)
+        text = self.close_link.sub(r'\\\\xfc\\\\xfd\1\\\\xfd\\\\xfc', text)
+
         # Convert internal links.
         text = self.internal_link.sub(r'[\1:\2] (/\1/\2)', text)
 
@@ -42,8 +51,6 @@
 
         # Decode whitelist elements.
         text = self.whitelist_escp.sub(r'<\1>', text)
-
-        # Make links safer.
-        text = self.html_link.sub(r'<a href="\1" target="_blank" rel="nofollow">', text)
+        text = self.link_escp.sub(r'<\1>', text)
 
         return text


--- a/ckan/tests/functional/test_package.py	Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/functional/test_package.py	Wed Jun 08 15:16:42 2011 +0100
@@ -1383,7 +1383,7 @@
         self.body = str(self.res)
         self.assert_fragment('<table width="100%" border="1">')
         self.assert_fragment('<td rowspan="2"><b>Description</b></td>')
-        self.assert_fragment('<a href="http://www.nber.org/patents/subcategories.txt">subcategory.txt</a>')
+        self.assert_fragment('<a href="http://www.nber.org/patents/subcategories.txt" target="_blank" rel="nofollow">subcategory.txt</a>')
         self.assert_fragment('<td colspan="2"><center>--</center></td>')
         self.fail_if_fragment('<script>')
 


--- a/ckan/tests/functional/test_user.py	Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/functional/test_user.py	Wed Jun 08 15:16:42 2011 +0100
@@ -8,9 +8,6 @@
 class TestUserController(FunctionalTestCase, HtmlCheckMethods):
     @classmethod
     def setup_class(self):
-        model.repo.init_db()
-        model.repo.rebuild_db()
-        model.repo.init_db()
         CreateTestData.create()
 
         # make 3 changes, authored by annafan
@@ -21,6 +18,10 @@
             rev.author = u'annafan'
             model.repo.commit_and_remove()
 
+        CreateTestData.create_user('unfinisher', about='<a href="http://unfinished.tag')
+        CreateTestData.create_user('uncloser', about='<a href="http://unclosed.tag">')
+        CreateTestData.create_user('spammer', about=u'<a href="http://mysite">mysite</a><a href=\u201dhttp://test2\u201d>test2</a>')
+        
     @classmethod
     def teardown_class(self):
         model.repo.rebuild_db()
@@ -65,6 +66,39 @@
         assert 'My Account' in main_res, main_res
         assert 'Edit' in main_res, main_res
 
+    def test_user_read_about_unfinished(self):
+        user = model.User.by_name(u'unfinisher')
+        offset = '/user/%s' % user.id
+        res = self.app.get(offset, status=200)
+        main_res = self.main_div(res)
+        assert 'unfinisher' in res, res
+        assert '<a href="http://unfinished.tag' in main_res, main_res
+
+    def test_user_read_about_unclosed(self):
+        user = model.User.by_name(u'uncloser')
+        offset = '/user/%s' % user.id
+        res = self.app.get(offset, status=200)
+        main_res = self.main_div(res)
+        assert 'unclosed' in res, res
+        # tag gets closed by genshi
+        assert '<a href="http://unclosed.tag" target="_blank" rel="nofollow">\n</a>' in main_res, main_res
+
+    def test_user_read_about_spam(self):
+        user = model.User.by_name(u'spammer')
+        offset = '/user/%s' % user.id
+        res = self.app.get(offset, status=200)
+        main_res = self.main_div(res)
+        assert 'spammer' in res, res
+        self.check_named_element(res, 'a',
+                                 'href="http://mysite"',
+                                 'target="_blank"',
+                                 'rel="nofollow"')
+
+        self.check_named_element(res, 'a',
+                                 'href="TAG MALFORMED"',
+                                 'target="_blank"',
+                                 'rel="nofollow"')
+        
     def test_user_login(self):
         offset = url_for(controller='user', action='login', id=None)
         res = self.app.get(offset, status=200)


--- a/ckan/tests/misc/test_format_text.py	Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/misc/test_format_text.py	Wed Jun 08 15:16:42 2011 +0100
@@ -34,15 +34,37 @@
         
     def test_internal_link(self):
         instr = 'package:test-_pkg'
-        exp = '<a href="/package/test-_pkg" target="_blank" rel="nofollow">package:test-_pkg</a>'
+        exp = '<a href="/package/test-_pkg">package:test-_pkg</a>'
         format = MarkdownFormat()
         out = format.to_html(instr)
         assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
 
     def test_normal_link(self):
-        instr = '<http:/somelink/>'
-        exp = '<a href="http:/somelink/" target="_blank" rel="nofollow">http:/somelink/</a>'
+        instr = '<http://somelink/>'
+        exp = '<a href="http://somelink/" target="_blank" rel="nofollow">http://somelink/</a>'
         format = MarkdownFormat()
         out = format.to_html(instr)
         assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
 
+    def test_malformed_link_1(self):
+        instr = u'<a href=\u201dsomelink\u201d>somelink</a>'
+        exp = '<a href="TAG MALFORMED" target="_blank" rel="nofollow">somelink</a>'
+        format = MarkdownFormat()
+        out = format.to_html(instr)
+        assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
+
+    def test_malformed_link_2(self):
+        instr = u'<a href="http://url.com> url >'
+        exp = '<a href="TAG MALFORMED" target="_blank" rel="nofollow"> url >'
+        format = MarkdownFormat()
+        out = format.to_html(instr)
+        assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
+
+    def test_malformed_link_3(self):
+        instr = u'<a href="http://url.com"> url'
+        exp = '<a href="http://url.com" target="_blank" rel="nofollow"> url'
+        # NB when this is put into Genshi, it will close the tag for you.
+        format = MarkdownFormat()
+        out = format.to_html(instr)
+        assert exp in out, '\nGot: %s\nWanted: %s' % (out, exp)
+


--- a/ckan/tests/models/test_package.py	Wed Jun 08 13:44:42 2011 +0100
+++ b/ckan/tests/models/test_package.py	Wed Jun 08 15:16:42 2011 +0100
@@ -87,7 +87,7 @@
         assert out['metadata_modified'] == pkg.metadata_modified.isoformat()
         assert out['metadata_created'] == pkg.metadata_created.isoformat()
         assert_equal(out['notes'], pkg.notes)
-        assert_equal(out['notes_rendered'], '<p>A <b>great</b> package [HTML_REMOVED] like <a href="/package/pollution_stats" target="_blank" rel="nofollow">package:pollution_stats</a>\n</p>')
+        assert_equal(out['notes_rendered'], '<p>A <b>great</b> package [HTML_REMOVED] like <a href="/package/pollution_stats">package:pollution_stats</a>\n</p>')
 
 
 class TestPackageWithTags:

Repository URL: https://bitbucket.org/okfn/ckan/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list