Skip to content

bpo-44688: Remove ASCII limitation from sqlite3 collation names #27395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/library/sqlite3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,10 @@ Connection Objects

con.create_collation("reverse", None)

.. versionchanged:: 3.11
The collation name can contain any Unicode character. Earlier, only
ASCII characters were allowed.


.. method:: interrupt()

Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.11.rst
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ sqlite3
:meth:`~sqlite3.Connection.set_authorizer`.
(Contributed by Erlend E. Aasland in :issue:`44491`.)

* Collation name :meth:`~sqlite3.Connection.create_collation` can now
contain any Unicode character. Collation names with invalid characters
now raise :exc:`UnicodeEncodeError` instead of :exc:`sqlite3.ProgrammingError`.
(Contributed by Erlend E. Aasland in :issue:`44688`.)


Removed
=======
Expand Down
3 changes: 1 addition & 2 deletions Lib/sqlite3/test/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ def test_create_collation_not_callable(self):

def test_create_collation_not_ascii(self):
con = sqlite.connect(":memory:")
with self.assertRaises(sqlite.ProgrammingError):
con.create_collation("collä", lambda x, y: (x > y) - (x < y))
con.create_collation("collä", lambda x, y: (x > y) - (x < y))

def test_create_collation_bad_upper(self):
class BadUpperStr(str):
Expand Down
2 changes: 1 addition & 1 deletion Lib/sqlite3/test/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def test_connection_call(self):
def test_collation(self):
def collation_cb(a, b):
return 1
self.assertRaises(sqlite.ProgrammingError, self.con.create_collation,
self.assertRaises(UnicodeEncodeError, self.con.create_collation,
# Lone surrogate cannot be encoded to the default encoding (utf8)
"\uDC80", collation_cb)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:meth:`sqlite3.Connection.create_collation` now accepts non-ASCII collation
names. Patch by Erlend E. Aasland.
16 changes: 11 additions & 5 deletions Modules/_sqlite/clinic/connection.c.h
Original file line number Diff line number Diff line change
Expand Up @@ -722,13 +722,14 @@ PyDoc_STRVAR(pysqlite_connection_create_collation__doc__,

static PyObject *
pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
PyObject *name, PyObject *callable);
const char *name,
PyObject *callable);

static PyObject *
pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *name;
const char *name;
PyObject *callable;

if (!_PyArg_CheckPositional("create_collation", nargs, 2, 2)) {
Expand All @@ -738,10 +739,15 @@ pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const
_PyArg_BadArgument("create_collation", "argument 1", "str", args[0]);
goto exit;
}
if (PyUnicode_READY(args[0]) == -1) {
Py_ssize_t name_length;
name = PyUnicode_AsUTF8AndSize(args[0], &name_length);
if (name == NULL) {
goto exit;
}
if (strlen(name) != (size_t)name_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
name = args[0];
callable = args[1];
return_value = pysqlite_connection_create_collation_impl(self, name, callable);

Expand Down Expand Up @@ -811,4 +817,4 @@ pysqlite_connection_exit(pysqlite_Connection *self, PyObject *const *args, Py_ss
#ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */
/*[clinic end generated code: output=30f11f2d8f09bdf0 input=a9049054013a1b77]*/
/*[clinic end generated code: output=a7a899c4e41381ac input=a9049054013a1b77]*/
63 changes: 11 additions & 52 deletions Modules/_sqlite/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -1720,7 +1720,7 @@ pysqlite_connection_backup_impl(pysqlite_Connection *self,
/*[clinic input]
_sqlite3.Connection.create_collation as pysqlite_connection_create_collation

name: unicode
name: str
callback as callable: object
/

Expand All @@ -1729,61 +1729,26 @@ Creates a collation function. Non-standard.

static PyObject *
pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
PyObject *name, PyObject *callable)
/*[clinic end generated code: output=0f63b8995565ae22 input=5c3898813a776cf2]*/
const char *name,
PyObject *callable)
/*[clinic end generated code: output=a4ceaff957fdef9a input=301647aab0f2fb1d]*/
{
PyObject* uppercase_name = 0;
Py_ssize_t i, len;
_Py_IDENTIFIER(upper);
const char *uppercase_name_str;
int rc;
unsigned int kind;
const void *data;

if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
goto finally;
}

uppercase_name = _PyObject_CallMethodIdOneArg((PyObject *)&PyUnicode_Type,
&PyId_upper, name);
if (!uppercase_name) {
goto finally;
}

if (PyUnicode_READY(uppercase_name))
goto finally;
len = PyUnicode_GET_LENGTH(uppercase_name);
kind = PyUnicode_KIND(uppercase_name);
data = PyUnicode_DATA(uppercase_name);
for (i=0; i<len; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
if ((ch >= '0' && ch <= '9')
|| (ch >= 'A' && ch <= 'Z')
|| (ch == '_'))
{
continue;
} else {
PyErr_SetString(self->ProgrammingError,
"invalid character in collation name");
goto finally;
}
return NULL;
}

uppercase_name_str = PyUnicode_AsUTF8(uppercase_name);
if (!uppercase_name_str)
goto finally;

int rc;
int flags = SQLITE_UTF8;
if (callable == Py_None) {
rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags,
rc = sqlite3_create_collation_v2(self->db, name, flags,
NULL, NULL, NULL);
}
else {
if (!PyCallable_Check(callable)) {
PyErr_SetString(PyExc_TypeError, "parameter must be callable");
goto finally;
return NULL;
}
rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags,
rc = sqlite3_create_collation_v2(self->db, name, flags,
Py_NewRef(callable),
&pysqlite_collation_callback,
&_destructor);
Expand All @@ -1798,16 +1763,10 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
Py_DECREF(callable);
}
_pysqlite_seterror(self->db);
goto finally;
}

finally:
Py_XDECREF(uppercase_name);

if (PyErr_Occurred()) {
return NULL;
}
return Py_NewRef(Py_None);

Py_RETURN_NONE;
}

/*[clinic input]
Expand Down