Skip to content

FIX Let ColumnTransformer.get_feature_names handle transformers with non-string feature names #18459

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 8, 2021
5 changes: 5 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ Changelog
of each transformer in `output_indices_`. :pr:`18393` by
:user:`Luca Bittarello <lbittarello>`.

- |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports
non-string feature names returned by any of its transformers.
:pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>` and
:user:`Alonso Silva Allende <alonsosilvaallende>`.

:mod:`sklearn.datasets`
.......................

Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def get_feature_names(self):
raise AttributeError("Transformer %s (type %s) does not "
"provide get_feature_names."
% (str(name), type(trans).__name__))
feature_names.extend([name + "__" + f for f in
feature_names.extend([f"{name}__{f}" for f in
trans.get_feature_names()])
return feature_names

Expand Down
28 changes: 18 additions & 10 deletions sklearn/compose/tests/test_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,31 +748,38 @@ def test_column_transformer_cloning():
assert hasattr(ct.transformers_[0][1], 'mean_')


def test_column_transformer_get_feature_names():
def test_column_transformer_get_feature_names_raises():
X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T
ct = ColumnTransformer([('trans', Trans(), [0, 1])])
# raise correct error when not fitted
with pytest.raises(NotFittedError):
ct.get_feature_names()
# raise correct error when no feature names are available
ct.fit(X_array)
assert_raise_message(AttributeError,
"Transformer trans (type Trans) does not provide "
"get_feature_names", ct.get_feature_names)
msg = r"Transformer trans \(type Trans\) does not provide " \
r"get_feature_names"
with pytest.raises(AttributeError, match=msg):
ct.get_feature_names()

# working example
X = np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}],
[{'c': 5}, {'c': 6}]], dtype=object).T

@pytest.mark.parametrize("X, keys", [
(np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}],
[{'c': 5}, {'c': 6}]], dtype=object).T, ('a', 'b', 'c')),
(np.array([[{1: 1, 2: 2}, {1: 3, 2: 4}],
[{3: 5}, {3: 6}]], dtype=object).T, ('1', '2', '3')),
])
def test_column_transformer_get_feature_names(X, keys):
ct = ColumnTransformer(
[('col' + str(i), DictVectorizer(), i) for i in range(2)])
ct.fit(X)
assert ct.get_feature_names() == ['col0__a', 'col0__b', 'col1__c']
assert ct.get_feature_names() == [f'col0__{key}' for key in keys[:2]] + \
[f'col1__{keys[2]}']

# drop transformer
ct = ColumnTransformer(
[('col0', DictVectorizer(), 0), ('col1', 'drop', 1)])
ct.fit(X)
assert ct.get_feature_names() == ['col0__a', 'col0__b']
assert ct.get_feature_names() == [f'col0__{key}' for key in keys[:2]]

# passthrough transformer
ct = ColumnTransformer([('trans', 'passthrough', [0, 1])])
Expand All @@ -782,7 +789,8 @@ def test_column_transformer_get_feature_names():
ct = ColumnTransformer([('trans', DictVectorizer(), 0)],
remainder='passthrough')
ct.fit(X)
assert ct.get_feature_names() == ['trans__a', 'trans__b', 'x1']
assert ct.get_feature_names() == [f'trans__{key}' for key in keys[:2]] + \
['x1']

ct = ColumnTransformer([('trans', 'passthrough', [1])],
remainder='passthrough')
Expand Down