Skip to content

Commit e613e6a

Browse files
bpo-30708: Check for null characters in PyUnicode_AsWideCharString(). (#2285)
Raise a ValueError if the second argument is NULL and the wchar_t\* string contains null characters.
1 parent 65474b9 commit e613e6a

File tree

6 files changed

+44
-42
lines changed

6 files changed

+44
-42
lines changed

Doc/c-api/unicode.rst

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -934,16 +934,22 @@ wchar_t Support
934934
Convert the Unicode object to a wide character string. The output string
935935
always ends with a null character. If *size* is not *NULL*, write the number
936936
of wide characters (excluding the trailing null termination character) into
937-
*\*size*.
937+
*\*size*. Note that the resulting :c:type:`wchar_t` string might contain
938+
null characters, which would cause the string to be truncated when used with
939+
most C functions. If *size* is *NULL* and the :c:type:`wchar_t*` string
940+
contains null characters a :exc:`ValueError` is raised.
938941
939942
Returns a buffer allocated by :c:func:`PyMem_Alloc` (use
940-
:c:func:`PyMem_Free` to free it) on success. On error, returns *NULL*,
941-
*\*size* is undefined and raises a :exc:`MemoryError`. Note that the
942-
resulting :c:type:`wchar_t` string might contain null characters, which
943-
would cause the string to be truncated when used with most C functions.
943+
:c:func:`PyMem_Free` to free it) on success. On error, returns *NULL*
944+
and *\*size* is undefined. Raises a :exc:`MemoryError` if memory allocation
945+
is failed.
944946
945947
.. versionadded:: 3.2
946948
949+
.. versionchanged:: 3.7
950+
Raises a :exc:`ValueError` if *size* is *NULL* and the :c:type:`wchar_t*`
951+
string contains null characters.
952+
947953
948954
.. _builtincodecs:
949955

Doc/whatsnew/3.7.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,10 @@ Changes in the C API
360360
:c:type:`unsigned long`.
361361
(Contributed by Serhiy Storchaka in :issue:`6532`.)
362362

363+
- :c:func:`PyUnicode_AsWideCharString` now raises a :exc:`ValueError` if the
364+
second argument is *NULL* and the :c:type:`wchar_t*` string contains null
365+
characters. (Contributed by Serhiy Storchaka in :issue:`30708`.)
366+
363367

364368
Removed
365369
=======

Lib/ctypes/test/test_slicing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def test_wchar_ptr(self):
134134
dll.my_wcsdup.restype = POINTER(c_wchar)
135135
dll.my_wcsdup.argtypes = POINTER(c_wchar),
136136
dll.my_free.restype = None
137-
res = dll.my_wcsdup(s)
137+
res = dll.my_wcsdup(s[:-1])
138138
self.assertEqual(res[:len(s)], s)
139139
self.assertEqual(res[:len(s):], s)
140140
self.assertEqual(res[len(s)-1:-1:-1], s[::-1])
@@ -153,7 +153,7 @@ def test_wchar_ptr(self):
153153
dll.my_wcsdup.restype = POINTER(c_long)
154154
else:
155155
self.skipTest('Pointers to c_wchar are not supported')
156-
res = dll.my_wcsdup(s)
156+
res = dll.my_wcsdup(s[:-1])
157157
tmpl = list(range(ord("a"), ord("z")+1))
158158
self.assertEqual(res[:len(s)-1], tmpl)
159159
self.assertEqual(res[:len(s)-1:], tmpl)

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,10 @@ Windows
12451245
C API
12461246
-----
12471247

1248+
- bpo-30708: PyUnicode_AsWideCharString() now raises a ValueError if the
1249+
second argument is NULL and the wchar_t\* string contains null
1250+
characters.
1251+
12481252
- bpo-16500: Deprecate PyOS_AfterFork() and add PyOS_BeforeFork(),
12491253
PyOS_AfterFork_Parent() and PyOS_AfterFork_Child().
12501254

Modules/_io/winconsoleio.c

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -304,18 +304,11 @@ _io__WindowsConsoleIO___init___impl(winconsoleio *self, PyObject *nameobj,
304304
if (!d)
305305
return -1;
306306

307-
Py_ssize_t length;
308-
name = PyUnicode_AsWideCharString(decodedname, &length);
307+
name = PyUnicode_AsWideCharString(decodedname, NULL);
309308
console_type = _PyIO_get_console_type(decodedname);
310309
Py_CLEAR(decodedname);
311310
if (name == NULL)
312311
return -1;
313-
314-
if (wcslen(name) != length) {
315-
PyMem_Free(name);
316-
PyErr_SetString(PyExc_ValueError, "embedded null character");
317-
return -1;
318-
}
319312
}
320313

321314
s = mode;

Objects/unicodeobject.c

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2953,23 +2953,26 @@ PyUnicode_FromFormat(const char *format, ...)
29532953

29542954
#ifdef HAVE_WCHAR_H
29552955

2956-
/* Helper function for PyUnicode_AsWideChar() and PyUnicode_AsWideCharString():
2957-
convert a Unicode object to a wide character string.
2956+
/* Convert a Unicode object to a wide character string.
29582957
29592958
- If w is NULL: return the number of wide characters (including the null
29602959
character) required to convert the unicode object. Ignore size argument.
29612960
29622961
- Otherwise: return the number of wide characters (excluding the null
29632962
character) written into w. Write at most size wide characters (including
29642963
the null character). */
2965-
static Py_ssize_t
2966-
unicode_aswidechar(PyObject *unicode,
2967-
wchar_t *w,
2968-
Py_ssize_t size)
2964+
Py_ssize_t
2965+
PyUnicode_AsWideChar(PyObject *unicode,
2966+
wchar_t *w,
2967+
Py_ssize_t size)
29692968
{
29702969
Py_ssize_t res;
29712970
const wchar_t *wstr;
29722971

2972+
if (unicode == NULL) {
2973+
PyErr_BadInternalCall();
2974+
return -1;
2975+
}
29732976
wstr = PyUnicode_AsUnicodeAndSize(unicode, &res);
29742977
if (wstr == NULL)
29752978
return -1;
@@ -2986,43 +2989,35 @@ unicode_aswidechar(PyObject *unicode,
29862989
return res + 1;
29872990
}
29882991

2989-
Py_ssize_t
2990-
PyUnicode_AsWideChar(PyObject *unicode,
2991-
wchar_t *w,
2992-
Py_ssize_t size)
2993-
{
2994-
if (unicode == NULL) {
2995-
PyErr_BadInternalCall();
2996-
return -1;
2997-
}
2998-
return unicode_aswidechar(unicode, w, size);
2999-
}
3000-
30012992
wchar_t*
30022993
PyUnicode_AsWideCharString(PyObject *unicode,
30032994
Py_ssize_t *size)
30042995
{
3005-
wchar_t* buffer;
2996+
const wchar_t *wstr;
2997+
wchar_t *buffer;
30062998
Py_ssize_t buflen;
30072999

30083000
if (unicode == NULL) {
30093001
PyErr_BadInternalCall();
30103002
return NULL;
30113003
}
30123004

3013-
buflen = unicode_aswidechar(unicode, NULL, 0);
3014-
if (buflen == -1)
3005+
wstr = PyUnicode_AsUnicodeAndSize(unicode, &buflen);
3006+
if (wstr == NULL) {
30153007
return NULL;
3016-
buffer = PyMem_NEW(wchar_t, buflen);
3017-
if (buffer == NULL) {
3018-
PyErr_NoMemory();
3008+
}
3009+
if (size == NULL && wcslen(wstr) != (size_t)buflen) {
3010+
PyErr_SetString(PyExc_ValueError,
3011+
"embedded null character");
30193012
return NULL;
30203013
}
3021-
buflen = unicode_aswidechar(unicode, buffer, buflen);
3022-
if (buflen == -1) {
3023-
PyMem_FREE(buffer);
3014+
3015+
buffer = PyMem_NEW(wchar_t, buflen + 1);
3016+
if (buffer == NULL) {
3017+
PyErr_NoMemory();
30243018
return NULL;
30253019
}
3020+
memcpy(buffer, wstr, (buflen + 1) * sizeof(wchar_t));
30263021
if (size != NULL)
30273022
*size = buflen;
30283023
return buffer;

0 commit comments

Comments
 (0)