Skip to content

gh-56698: Fix base64-url parsing errors in email headers #136830

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Lib/email/_encoded_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,22 @@ def decode_b(encoded):
# The non-alphabet characters are ignored as far as padding
# goes, but we don't know how many there are. So try without adding
# padding to see if it works.
#
# We use urlsafe_b64decode here because some mailers apparently use the
# urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode
# both the urlsafe and regular alphabets.

try:
return (
base64.b64decode(encoded, validate=False),
base64.urlsafe_b64decode(encoded),
[errors.InvalidBase64CharactersDefect()],
)
except binascii.Error:
# Add as much padding as could possibly be necessary (extra padding
# is ignored).
try:
return (
base64.b64decode(encoded + b'==', validate=False),
base64.urlsafe_b64decode(encoded + b'=='),
[errors.InvalidBase64CharactersDefect(),
errors.InvalidBase64PaddingDefect()],
)
Expand Down
11 changes: 7 additions & 4 deletions Lib/email/base64mime.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
]


from base64 import b64encode
from binascii import b2a_base64, a2b_base64
from base64 import b64encode, urlsafe_b64decode
from binascii import b2a_base64

CRLF = '\r\n'
NL = '\n'
Expand Down Expand Up @@ -102,12 +102,15 @@ def decode(string):
base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
level email.header class for that functionality.
"""
# We use urlsafe_b64decode here because some mailers apparently use the
# urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode both
# the urlsafe and regular alphabets.
if not string:
return bytes()
elif isinstance(string, str):
return a2b_base64(string.encode('raw-unicode-escape'))
return urlsafe_b64decode(string.encode('raw-unicode-escape'))
else:
return a2b_base64(string)
return urlsafe_b64decode(string)


# For convenience and backwards compatibility w/ standard base64 module
Expand Down
8 changes: 8 additions & 0 deletions Lib/test/test_email/test__encoded_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ def test_missing_padding(self):
# 2 missing padding characters
self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect])

def test_urlsafe_alphabet(self):
self._test(
b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==',
b'Anmeldung Netzanschluss S\xfcdring3p.jpg',
[errors.InvalidBase64CharactersDefect])
# mix of different base64 alphabets
self._test(b'aGVsbG8_Pz8/', b'hello????', [errors.InvalidBase64CharactersDefect])

def test_invalid_character(self):
self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect])

Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_email/test_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -4663,6 +4663,11 @@ def test_decode(self):
eq = self.assertEqual
eq(base64mime.decode(''), b'')
eq(base64mime.decode('aGVsbG8='), b'hello')
eq(base64mime.decode(
'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='),
b'Anmeldung Netzanschluss S\xfcdring3p.jpg')
# mix of different base64 alphabets
eq(base64mime.decode('aGVsbG8_Pz8/'), b'hello????')

def test_encode(self):
eq = self.assertEqual
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Accept urlsafe base64 in email headers, as those are sometimes created by
email clients.
Loading