Uses library for html cleanup

2022-07-04 13:14:22 -07:00 · 2022-07-04 13:14:22 -07:00 · 7f78140015
commit 7f78140015
parent 58b23a74da
6 changed files with 44 additions and 102 deletions
--- a/bookwyrm/tests/test_sanitize_html.py
+++ b/bookwyrm/tests/test_sanitize_html.py
@ -1,7 +1,7 @@
 """ make sure only valid html gets to the app """
 from django.test import TestCase

-from bookwyrm.sanitize_html import InputHtmlParser
+from bookwyrm.utils.sanitizer import clean


 class Sanitizer(TestCase):
@ -10,53 +10,45 @@ class Sanitizer(TestCase):
    def test_no_html(self):
        """just text"""
        input_text = "no      html  "
-        parser = InputHtmlParser()
-        parser.feed(input_text)
-        output = parser.get_output()
+        output = clean(input_text)
        self.assertEqual(input_text, output)

    def test_valid_html(self):
        """leave the html untouched"""
        input_text = "<b>yes    </b> <i>html</i>"
-        parser = InputHtmlParser()
-        parser.feed(input_text)
-        output = parser.get_output()
+        output = clean(input_text)
        self.assertEqual(input_text, output)

    def test_valid_html_attrs(self):
        """and don't remove useful attributes"""
        input_text = '<a href="fish.com">yes    </a> <i>html</i>'
-        parser = InputHtmlParser()
-        parser.feed(input_text)
-        output = parser.get_output()
+        output = clean(input_text)
        self.assertEqual(input_text, output)

    def test_valid_html_invalid_attrs(self):
        """do remove un-approved attributes"""
        input_text = '<a href="fish.com" fish="hello">yes    </a> <i>html</i>'
-        parser = InputHtmlParser()
-        parser.feed(input_text)
-        output = parser.get_output()
+        output = clean(input_text)
        self.assertEqual(output, '<a href="fish.com">yes    </a> <i>html</i>')

    def test_invalid_html(self):
        """remove all html when the html is malformed"""
        input_text = "<b>yes  <i>html</i>"
-        parser = InputHtmlParser()
-        parser.feed(input_text)
-        output = parser.get_output()
+        output = clean(input_text)
        self.assertEqual("yes  html", output)

        input_text = "yes <i></b>html   </i>"
-        parser = InputHtmlParser()
-        parser.feed(input_text)
-        output = parser.get_output()
+        output = clean(input_text)
        self.assertEqual("yes html   ", output)

    def test_disallowed_html(self):
        """remove disallowed html but keep allowed html"""
        input_text = "<div>  yes <i>html</i></div>"
-        parser = InputHtmlParser()
-        parser.feed(input_text)
-        output = parser.get_output()
+        output = clean(input_text)
        self.assertEqual("  yes <i>html</i>", output)
+
+    def test_escaped_bracket(self):
+        """remove &gt; and &lt;"""
+        input_text = "&lt;dev&gt;hi&lt;/div&gt;"
+        output = clean(input_text)
+        self.assertEqual("hi", output)