Skip to content

fix a unicode_literals regression in ihatexml.py #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion html5lib/ihatexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def fromXmlName(self, name):
return name

def escapeChar(self, char):
replacement = "U" + hex(ord(char))[2:].upper().rjust(5, "0")
replacement = "U%05X" % ord(char)
self.replaceCache[char] = replacement
return replacement

Expand Down
19 changes: 18 additions & 1 deletion html5lib/tests/test_parser2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import absolute_import, division, unicode_literals

import io
import warnings

from . import support # flake8: noqa
from html5lib import html5parser
from html5lib.constants import namespaces
from html5lib.constants import namespaces, DataLossWarning
from html5lib import treebuilders

import unittest
Expand All @@ -16,6 +17,10 @@ class MoreParserTests(unittest.TestCase):

def setUp(self):
self.dom_tree = treebuilders.getTreeBuilder("dom")
try:
self.lxml_tree = treebuilders.getTreeBuilder("lxml")
except ImportError:
self.lxml_tree = None

def test_assertDoctypeCloneable(self):
parser = html5parser.HTMLParser(tree=self.dom_tree)
Expand All @@ -27,6 +32,18 @@ def test_line_counter(self):
parser = html5parser.HTMLParser(tree=self.dom_tree)
parser.parse("<pre>\nx\n&gt;\n</pre>")

def test_ihatexml(self):
if not self.lxml_tree:
return
parser = html5parser.HTMLParser(tree=self.lxml_tree)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")

parser.parse(b'<p xml:lang="pl">Witam wszystkich')

self.assertEqual(len(w), 1)
self.assertTrue(issubclass(w[-1].category, DataLossWarning))

def test_namespace_html_elements_0_dom(self):
parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True)
doc = parser.parse("<html></html>")
Expand Down