Skip to content

Commit 07bbab1

Browse files
author
Marc DM
committed
good good, all tests pass except the one that needs to go
1 parent de31243 commit 07bbab1

File tree

4 files changed

+86
-26
lines changed

4 files changed

+86
-26
lines changed

html5lib/serializer/htmlserializer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from __future__ import absolute_import, division, unicode_literals
2-
from six import string_types
2+
from six import text_type
33

44
import gettext
55
_ = gettext.gettext
@@ -154,14 +154,14 @@ def __init__(self, **kwargs):
154154
self.strict = False
155155

156156
def encode(self, string):
157-
assert(isinstance(string, string_types))
157+
assert(isinstance(string, text_type))
158158
if self.encoding:
159159
return string.encode(self.encoding, unicode_encode_errors)
160160
else:
161161
return string
162162

163163
def encodeStrict(self, string):
164-
assert(isinstance(string, string_types))
164+
assert(isinstance(string, text_type))
165165
if self.encoding:
166166
return string.encode(self.encoding, "strict")
167167
else:

html5lib/tests/test_six_encoding.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11

2-
from html5lib import html5parser, treewalkers, serializer
3-
from nose.tools import eq_
2+
from html5lib import html5parser, treewalkers, treebuilders, serializer
43

54

6-
def test_treewalker6():
5+
def test_treewalker_six_mix():
76
"""Str/Unicode mix. If str attrs added to tree"""
87

98
text = '<a href="http://example.com">Example</a>'
10-
end_text = '<a href="http://example.com" class="test123">Example</a>'
11-
parser = html5parser.HTMLParser()
12-
walker = treewalkers.getTreeWalker('etree')
9+
end_texts = ('<a href="http://example.com" class="test123">Example</a>',
10+
'<a class="test123" href="http://example.com">Example</a>')
11+
parser = html5parser.HTMLParser(tree=treebuilders.getTreeBuilder('dom'))
12+
walker = treewalkers.getTreeWalker('dom')
1313
serializr = serializer.HTMLSerializer(quote_attr_values=True)
1414
domtree = parser.parseFragment(text)
1515

1616
# at this point domtree should be a DOCUMENT_FRAGMENT
1717
domtree[0].set('class', 'test123')
18-
eq_(end_text, serializr.render(walker(domtree)))
18+
out = serializr.render(walker(domtree))
19+
if not out in end_texts:
20+
raise AssertionError('%r not in %r' % (out, end_texts))

html5lib/tests/test_treewalkers.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,3 +310,41 @@ def test_treewalker():
310310
"document")]
311311
errors = errors.split("\n")
312312
yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
313+
314+
315+
def set_attribute_on_first_child(docfrag, name, value, treeName):
316+
"""naively sets an attribute on the first child of the document
317+
fragment passed in"""
318+
setter = {'ElementTree': lambda d: d[0].set,
319+
'DOM': lambda d: d.firstChild.setAttribute}
320+
setter['PullDOM'] = setter['DOM']
321+
try:
322+
setter.get(treeName, setter['ElementTree'])(docfrag)(name, value)
323+
except TypeError:
324+
setter['DOM'](docfrag)(name, value)
325+
326+
327+
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
328+
"""tests what happens when we add attributes to the intext"""
329+
treeName, treeClass = tree
330+
parser = html5parser.HTMLParser(tree=treeClass["builder"])
331+
document = parser.parseFragment(intext)
332+
for nom, val in attrs_to_add:
333+
set_attribute_on_first_child(document, nom, val, treeName)
334+
335+
document = treeClass.get("adapter", lambda x: x)(document)
336+
output = convertTokens(treeClass["walker"](document))
337+
output = attrlist.sub(sortattrs, output)
338+
if not output in expected:
339+
raise AssertionError('%r not in %r' % (output, expected))
340+
341+
342+
def test_treewalker_six_mix():
343+
"""Str/Unicode mix. If str attrs added to tree"""
344+
345+
intext = '<a href="http://example.com">Example</a>'
346+
expected = '<a>\n class="test123"\n href="http://example.com"\n "Example"'
347+
attrs = [('class', 'test123')]
348+
349+
for tree in treeTypes.items():
350+
yield runTreewalkerEditTest, intext, expected, attrs, tree

html5lib/treewalkers/_base.py

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,24 @@
88
spaceCharacters = "".join(spaceCharacters)
99

1010

11+
def to_text(s, blank_if_none=True):
12+
"""Wrapper around six.text_type to convert None to empty string"""
13+
if s is None:
14+
if blank_if_none:
15+
return ""
16+
else:
17+
return None
18+
elif isinstance(s, text_type):
19+
return s
20+
else:
21+
return text_type(s)
22+
23+
24+
def is_text_or_none(string):
25+
"""Wrapper around isinstance(string_types) or is None"""
26+
return string is None or isinstance(string, string_types)
27+
28+
1129
class TreeWalker(object):
1230
def __init__(self, tree):
1331
self.tree = tree
@@ -26,8 +44,8 @@ def emptyTag(self, namespace, name, attrs, hasChildren=False):
2644
isinstance(value, string_types)
2745
for (namespace, name), value in attrs.items())
2846

29-
yield {"type": "EmptyTag", "name": text_type(name),
30-
"namespace": text_type(namespace),
47+
yield {"type": "EmptyTag", "name": to_text(name, False),
48+
"namespace": to_text(namespace),
3149
"data": attrs}
3250
if hasChildren:
3351
yield self.error(_("Void element has children"))
@@ -42,22 +60,24 @@ def startTag(self, namespace, name, attrs):
4260

4361
return {"type": "StartTag",
4462
"name": text_type(name),
45-
"namespace": text_type(namespace),
46-
"data": attrs}
63+
"namespace": to_text(namespace),
64+
"data": dict(((to_text(ns), to_text(na, False)),
65+
to_text(va, False))
66+
for (ns, na), va in attrs.items())}
4767

4868
def endTag(self, namespace, name):
4969
assert namespace is None or isinstance(namespace, string_types), type(namespace)
5070
assert isinstance(name, string_types), type(namespace)
5171

5272
return {"type": "EndTag",
53-
"name": text_type(name),
54-
"namespace": text_type(namespace),
73+
"name": to_text(name, False),
74+
"namespace": to_text(namespace),
5575
"data": {}}
5676

5777
def text(self, data):
5878
assert isinstance(data, string_types), type(data)
5979

60-
data = data
80+
data = to_text(data)
6181
middle = data.lstrip(spaceCharacters)
6282
left = data[:len(data) - len(middle)]
6383
if left:
@@ -73,23 +93,23 @@ def text(self, data):
7393
def comment(self, data):
7494
assert isinstance(data, string_types), type(data)
7595

76-
return {"type": "Comment", "data": data}
96+
return {"type": "Comment", "data": text_type(data)}
7797

7898
def doctype(self, name, publicId=None, systemId=None, correct=True):
79-
assert name is None or isinstance(name, string_types), type(name)
80-
assert publicId is None or isinstance(publicId, string_types), type(publicId)
81-
assert systemId is None or isinstance(systemId, string_types), type(systemId)
99+
assert is_text_or_none(name), type(name)
100+
assert is_text_or_none(publicId), type(publicId)
101+
assert is_text_or_none(systemId), type(systemId)
82102

83103
return {"type": "Doctype",
84-
"name": name if name is not None else "",
85-
"publicId": publicId,
86-
"systemId": systemId,
87-
"correct": correct}
104+
"name": to_text(name),
105+
"publicId": to_text(publicId),
106+
"systemId": to_text(systemId),
107+
"correct": to_text(correct)}
88108

89109
def entity(self, name):
90110
assert isinstance(name, string_types), type(name)
91111

92-
return {"type": "Entity", "name": name}
112+
return {"type": "Entity", "name": text_type(name)}
93113

94114
def unknown(self, nodeType):
95115
return self.error(_("Unknown node type: ") + nodeType)

0 commit comments

Comments
 (0)