Skip to content

Commit 2779327

Browse files
authored
MAINT make imbalanced-learn compatible with scikit-learn 1.1+ (#946)
1 parent 291af2e commit 2779327

34 files changed

+618
-169
lines changed

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
.. |PythonMinVersion| replace:: 3.8
3131
.. |NumPyMinVersion| replace:: 1.17.3
3232
.. |SciPyMinVersion| replace:: 1.3.2
33-
.. |ScikitLearnMinVersion| replace:: 1.1.0
33+
.. |ScikitLearnMinVersion| replace:: 1.1.3
3434
.. |MatplotlibMinVersion| replace:: 3.1.2
3535
.. |PandasMinVersion| replace:: 1.0.5
3636
.. |TensorflowMinVersion| replace:: 2.4.3

azure-pipelines.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
black --check --diff .
5252
displayName: Run black
5353
- bash: |
54-
./build_tools/circle/linting.sh
54+
./build_tools/azure/linting.sh
5555
displayName: Run linting
5656
- bash: |
5757
mypy imblearn/
@@ -112,7 +112,7 @@ jobs:
112112
ne(variables['Build.Reason'], 'Schedule')
113113
)
114114
matrix:
115-
py37_conda_forge_openblas_ubuntu_1804:
115+
py38_conda_forge_openblas_ubuntu_1804:
116116
DISTRIB: 'conda'
117117
CONDA_CHANNEL: 'conda-forge'
118118
PYTHON_VERSION: '3.8'
@@ -141,12 +141,12 @@ jobs:
141141
THREADPOOLCTL_VERSION: 'min'
142142
COVERAGE: 'false'
143143
# Linux + Python 3.8 build with OpenBLAS and without SITE_JOBLIB
144-
py37_conda_defaults_openblas:
144+
py38_conda_defaults_openblas:
145145
DISTRIB: 'conda'
146146
CONDA_CHANNEL: 'conda-forge'
147147
PYTHON_VERSION: '3.8'
148148
BLAS: 'openblas'
149-
NUMPY_VERSION: '1.19.5' # we cannot get an older version of the dependencies resolution
149+
NUMPY_VERSION: '1.21.0' # we cannot get an older version of the dependencies resolution
150150
SCIPY_VERSION: 'min'
151151
SKLEARN_VERSION: 'min'
152152
MATPLOTLIB_VERSION: 'none'

build_tools/azure/install.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
6767
make_conda "python=$PYTHON_VERSION"
6868
python -m pip install -U pip
6969

70-
python -m pip install scikit-learn pandas matplotlib
70+
python -m pip install pandas matplotlib
71+
python -m pip install --pre scikit-learn
7172

7273
elif [[ "$DISTRIB" == "conda-pip-latest-tensorflow" ]]; then
7374
make_conda "python=$PYTHON_VERSION"

build_tools/azure/linting.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/bin/bash
2+
3+
set -e
4+
# pipefail is necessary to propagate exit codes
5+
set -o pipefail
6+
7+
flake8 --show-source .
8+
echo -e "No problem detected by flake8\n"
9+
10+
# For docstrings and warnings of deprecated attributes to be rendered
11+
# properly, the property decorator must come before the deprecated decorator
12+
# (else they are treated as functions)
13+
14+
# do not error when grep -B1 "@property" finds nothing
15+
set +e
16+
bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"`
17+
18+
if [ ! -z "$bad_deprecation_property_order" ]
19+
then
20+
echo "property decorator should come before deprecated decorator"
21+
echo "found the following occurrencies:"
22+
echo $bad_deprecation_property_order
23+
exit 1
24+
fi
25+
26+
# Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE
27+
28+
doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")"
29+
30+
if [ ! -z "$doctest_directive" ]
31+
then
32+
echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:"
33+
echo "$doctest_directive"
34+
exit 1
35+
fi
36+
37+
joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")"
38+
39+
if [ ! -z "$joblib_import" ]; then
40+
echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
41+
echo "$joblib_import"
42+
exit 1
43+
fi

doc/ensemble.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ data set, this classifier will favor the majority classes::
3535
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
3636
>>> bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
3737
... random_state=0)
38-
>>> bc.fit(X_train, y_train) #doctest: +ELLIPSIS
38+
>>> bc.fit(X_train, y_train) #doctest:
3939
BaggingClassifier(...)
4040
>>> y_pred = bc.predict(X_test)
41-
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
41+
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
4242
0.77...
4343

4444
In :class:`BalancedBaggingClassifier`, each bootstrap sample will be further
@@ -54,10 +54,10 @@ sampling is controlled by the parameter `sampler` or the two parameters
5454
... sampling_strategy='auto',
5555
... replacement=False,
5656
... random_state=0)
57-
>>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
57+
>>> bbc.fit(X_train, y_train) # doctest:
5858
BalancedBaggingClassifier(...)
5959
>>> y_pred = bbc.predict(X_test)
60-
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
60+
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
6161
0.8...
6262

6363
Changing the `sampler` will give rise to different known implementation
@@ -78,10 +78,10 @@ each tree of the forest will be provided a balanced bootstrap sample
7878

7979
>>> from imblearn.ensemble import BalancedRandomForestClassifier
8080
>>> brf = BalancedRandomForestClassifier(n_estimators=100, random_state=0)
81-
>>> brf.fit(X_train, y_train) # doctest: +ELLIPSIS
81+
>>> brf.fit(X_train, y_train) # doctest:
8282
BalancedRandomForestClassifier(...)
8383
>>> y_pred = brf.predict(X_test)
84-
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
84+
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
8585
0.8...
8686

8787
.. _boosting:
@@ -97,10 +97,10 @@ a boosting iteration :cite:`seiffert2009rusboost`::
9797
>>> from imblearn.ensemble import RUSBoostClassifier
9898
>>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
9999
... random_state=0)
100-
>>> rusboost.fit(X_train, y_train) # doctest: +ELLIPSIS
100+
>>> rusboost.fit(X_train, y_train) # doctest:
101101
RUSBoostClassifier(...)
102102
>>> y_pred = rusboost.predict(X_test)
103-
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
103+
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
104104
0...
105105

106106
A specific method which uses :class:`~sklearn.ensemble.AdaBoostClassifier` as
@@ -111,10 +111,10 @@ the :class:`BalancedBaggingClassifier` API, one can construct the ensemble as::
111111

112112
>>> from imblearn.ensemble import EasyEnsembleClassifier
113113
>>> eec = EasyEnsembleClassifier(random_state=0)
114-
>>> eec.fit(X_train, y_train) # doctest: +ELLIPSIS
114+
>>> eec.fit(X_train, y_train) # doctest:
115115
EasyEnsembleClassifier(...)
116116
>>> y_pred = eec.predict(X_test)
117-
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
117+
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
118118
0.6...
119119

120120
.. topic:: Examples

doc/over_sampling.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ a classifier::
4040

4141
>>> from sklearn.svm import LinearSVC
4242
>>> clf = LinearSVC()
43-
>>> clf.fit(X_resampled, y_resampled) # doctest : +ELLIPSIS
43+
>>> clf.fit(X_resampled, y_resampled)
4444
LinearSVC(...)
4545

4646
In the figure below, we compare the decision functions of a classifier trained

imblearn/_min_dependencies.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
NUMPY_MIN_VERSION = "1.17.3"
55
SCIPY_MIN_VERSION = "1.3.2"
66
PANDAS_MIN_VERSION = "1.0.5"
7-
SKLEARN_MIN_VERSION = "1.1.0"
7+
SKLEARN_MIN_VERSION = "1.1.3"
88
TENSORFLOW_MIN_VERSION = "2.4.3"
99
KERAS_MIN_VERSION = "2.4.3"
10-
JOBLIB_MIN_VERSION = "1.0.0"
10+
JOBLIB_MIN_VERSION = "1.1.1"
1111
THREADPOOLCTL_MIN_VERSION = "2.0.0"
1212
PYTEST_MIN_VERSION = "5.0.1"
1313

imblearn/combine/_smote_enn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class SMOTEENN(BaseSampler):
9191
9292
>>> from collections import Counter
9393
>>> from sklearn.datasets import make_classification
94-
>>> from imblearn.combine import SMOTEENN # doctest: +NORMALIZE_WHITESPACE
94+
>>> from imblearn.combine import SMOTEENN # doctest:
9595
>>> X, y = make_classification(n_classes=2, class_sep=2,
9696
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
9797
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)

imblearn/combine/_smote_tomek.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class SMOTETomek(BaseSampler):
9090
>>> from collections import Counter
9191
>>> from sklearn.datasets import make_classification
9292
>>> from imblearn.combine import \
93-
SMOTETomek # doctest: +NORMALIZE_WHITESPACE
93+
SMOTETomek # doctest:
9494
>>> X, y = make_classification(n_classes=2, class_sep=2,
9595
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
9696
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)

imblearn/ensemble/_bagging.py

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
# Christos Aridas
55
# License: MIT
66

7+
import inspect
78
import numbers
9+
import warnings
810

911
import numpy as np
1012

@@ -41,10 +43,12 @@ class BalancedBaggingClassifier(BaggingClassifier):
4143
4244
Parameters
4345
----------
44-
base_estimator : estimator object, default=None
46+
estimator : estimator object, default=None
4547
The base estimator to fit on random subsets of the dataset.
4648
If None, then the base estimator is a decision tree.
4749
50+
.. versionadded:: 0.10
51+
4852
n_estimators : int, default=10
4953
The number of base estimators in the ensemble.
5054
@@ -100,18 +104,37 @@ class BalancedBaggingClassifier(BaggingClassifier):
100104
101105
.. versionadded:: 0.8
102106
107+
base_estimator : estimator object, default=None
108+
The base estimator to fit on random subsets of the dataset.
109+
If None, then the base estimator is a decision tree.
110+
111+
.. deprecated:: 0.10
112+
`base_estimator` was renamed to `estimator` in version 0.10 and
113+
will be removed in 0.12.
114+
103115
Attributes
104116
----------
117+
estimator_ : estimator
118+
The base estimator from which the ensemble is grown.
119+
120+
.. versionadded:: 0.10
121+
105122
base_estimator_ : estimator
106123
The base estimator from which the ensemble is grown.
107124
125+
.. deprecated:: 1.2
126+
`base_estimator_` is deprecated in `scikit-learn` 1.2 and will be
127+
removed in 1.4. Use `estimator_` instead. When the minimum version
128+
of `scikit-learn` supported by `imbalanced-learn` will reach 1.4,
129+
this attribute will be removed.
130+
108131
n_features_ : int
109132
The number of features when `fit` is performed.
110133
111134
.. deprecated:: 1.0
112135
`n_features_` is deprecated in `scikit-learn` 1.0 and will be removed
113-
in version 1.2. Depending of the version of `scikit-learn` installed,
114-
you will get be warned or not.
136+
in version 1.2. When the minimum version of `scikit-learn` supported
137+
by `imbalanced-learn` will reach 1.2, this attribute will be removed.
115138
116139
estimators_ : list of estimators
117140
The collection of fitted base estimators.
@@ -209,7 +232,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
209232
>>> from sklearn.model_selection import train_test_split
210233
>>> from sklearn.metrics import confusion_matrix
211234
>>> from imblearn.ensemble import \
212-
BalancedBaggingClassifier # doctest: +NORMALIZE_WHITESPACE
235+
BalancedBaggingClassifier # doctest:
213236
>>> X, y = make_classification(n_classes=2, class_sep=2,
214237
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
215238
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
@@ -218,7 +241,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
218241
>>> X_train, X_test, y_train, y_test = train_test_split(X, y,
219242
... random_state=0)
220243
>>> bbc = BalancedBaggingClassifier(random_state=42)
221-
>>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
244+
>>> bbc.fit(X_train, y_train) # doctest:
222245
BalancedBaggingClassifier(...)
223246
>>> y_pred = bbc.predict(X_test)
224247
>>> print(confusion_matrix(y_test, y_pred))
@@ -229,7 +252,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
229252
@_deprecate_positional_args
230253
def __init__(
231254
self,
232-
base_estimator=None,
255+
estimator=None,
233256
n_estimators=10,
234257
*,
235258
max_samples=1.0,
@@ -244,10 +267,18 @@ def __init__(
244267
random_state=None,
245268
verbose=0,
246269
sampler=None,
270+
base_estimator="deprecated",
247271
):
272+
# TODO: remove when supporting scikit-learn>=1.2
273+
bagging_classifier_signature = inspect.signature(super().__init__)
274+
estimator_params = {"base_estimator": base_estimator}
275+
if "estimator" in bagging_classifier_signature.parameters:
276+
estimator_params["estimator"] = estimator
277+
else:
278+
self.estimator = estimator
248279

249280
super().__init__(
250-
base_estimator,
281+
**estimator_params,
251282
n_estimators=n_estimators,
252283
max_samples=max_samples,
253284
max_features=max_features,
@@ -294,20 +325,54 @@ def _validate_estimator(self, default=DecisionTreeClassifier()):
294325
f"n_estimators must be greater than zero, " f"got {self.n_estimators}."
295326
)
296327

297-
if self.base_estimator is not None:
328+
if self.estimator is not None and (
329+
self.base_estimator not in [None, "deprecated"]
330+
):
331+
raise ValueError(
332+
"Both `estimator` and `base_estimator` were set. Only set `estimator`."
333+
)
334+
335+
if self.estimator is not None:
336+
base_estimator = clone(self.estimator)
337+
elif self.base_estimator not in [None, "deprecated"]:
338+
warnings.warn(
339+
"`base_estimator` was renamed to `estimator` in version 0.10 and "
340+
"will be removed in 0.12.",
341+
FutureWarning,
342+
)
298343
base_estimator = clone(self.base_estimator)
299344
else:
300345
base_estimator = clone(default)
301346

302347
if self.sampler_._sampling_type != "bypass":
303348
self.sampler_.set_params(sampling_strategy=self._sampling_strategy)
304349

305-
self.base_estimator_ = Pipeline(
306-
[
307-
("sampler", self.sampler_),
308-
("classifier", base_estimator),
309-
]
350+
self._estimator = Pipeline(
351+
[("sampler", self.sampler_), ("classifier", base_estimator)]
352+
)
353+
try:
354+
# scikit-learn < 1.2
355+
self.base_estimator_ = self._estimator
356+
except AttributeError:
357+
pass
358+
359+
# TODO: remove when supporting scikit-learn>=1.4
360+
@property
361+
def estimator_(self):
362+
"""Estimator used to grow the ensemble."""
363+
return self._estimator
364+
365+
# TODO: remove when supporting scikit-learn>=1.2
366+
@property
367+
def n_features_(self):
368+
"""Number of features when ``fit`` is performed."""
369+
warnings.warn(
370+
"`n_features_` was deprecated in scikit-learn 1.0. This attribute will "
371+
"not be accessible when the minimum supported version of scikit-learn "
372+
"is 1.2.",
373+
FutureWarning,
310374
)
375+
return self.n_features_in_
311376

312377
def fit(self, X, y):
313378
"""Build a Bagging ensemble of estimators from the training set (X, y).

0 commit comments

Comments
 (0)