Skip to content

MAINT Replace stats.mode calls with fixes._mode #938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Dec 2, 2022
Merged
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
versionSpec: '3.9'
- bash: |
# Include pytest compatibility with mypy
pip install pytest flake8 mypy==0.782 black==22.3
pip install pytest flake8==5.0.4 mypy==0.782 black==22.3
displayName: Install linters
- bash: |
black --check --diff .
Expand Down Expand Up @@ -146,7 +146,7 @@ jobs:
CONDA_CHANNEL: 'conda-forge'
PYTHON_VERSION: '3.8'
BLAS: 'openblas'
NUMPY_VERSION: '1.19.5' # we cannot get an older version of the dependencies resolution
NUMPY_VERSION: '1.21.0' # we cannot get an older version of the dependencies resolution
SCIPY_VERSION: 'min'
SKLEARN_VERSION: 'min'
MATPLOTLIB_VERSION: 'none'
Expand Down
4 changes: 2 additions & 2 deletions imblearn/_min_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
NUMPY_MIN_VERSION = "1.17.3"
SCIPY_MIN_VERSION = "1.3.2"
PANDAS_MIN_VERSION = "1.0.5"
SKLEARN_MIN_VERSION = "1.1.0"
SKLEARN_MIN_VERSION = "1.1.3"
TENSORFLOW_MIN_VERSION = "2.4.3"
KERAS_MIN_VERSION = "2.4.3"
JOBLIB_MIN_VERSION = "1.0.0"
JOBLIB_MIN_VERSION = "1.1.1"
THREADPOOLCTL_MIN_VERSION = "2.0.0"
PYTEST_MIN_VERSION = "5.0.1"

Expand Down
4 changes: 2 additions & 2 deletions imblearn/over_sampling/_smote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

import numpy as np
from scipy import sparse
from scipy import stats

from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.utils import check_random_state
Expand All @@ -29,6 +28,7 @@
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring
from ...utils._validation import _deprecate_positional_args
from ...utils.fixes import _mode


class BaseSMOTE(BaseOverSampler):
Expand Down Expand Up @@ -786,7 +786,7 @@ def _make_samples(self, X_class, klass, y_dtype, nn_indices, n_samples):
# where for each feature individually, each category generated is the
# most common category
X_new = np.squeeze(
stats.mode(X_class[nn_indices[samples_indices]], axis=1).mode, axis=1
_mode(X_class[nn_indices[samples_indices]], axis=1).mode, axis=1
)
y_new = np.full(n_samples, fill_value=klass, dtype=y_dtype)
return X_new, y_new
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from collections import Counter

import numpy as np
from scipy.stats import mode

from sklearn.utils import _safe_indexing

Expand All @@ -18,6 +17,8 @@
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._validation import _deprecate_positional_args
from ...utils.fixes import _mode


SEL_KIND = ("all", "mode")

Expand Down Expand Up @@ -155,7 +156,7 @@ def _fit_resample(self, X, y):
nnhood_idx = self.nn_.kneighbors(X_class, return_distance=False)[:, 1:]
nnhood_label = y[nnhood_idx]
if self.kind_sel == "mode":
nnhood_label, _ = mode(nnhood_label, axis=1)
nnhood_label, _ = _mode(nnhood_label, axis=1)
nnhood_bool = np.ravel(nnhood_label) == y_class
elif self.kind_sel == "all":
nnhood_label = nnhood_label == target_class
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from collections import Counter

import numpy as np
from scipy.stats import mode

from sklearn.utils import _safe_indexing

Expand All @@ -17,6 +16,8 @@
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._validation import _deprecate_positional_args
from ...utils.fixes import _mode


SEL_KIND = ("all", "mode")

Expand Down Expand Up @@ -182,7 +183,7 @@ def _fit_resample(self, X, y):
nnhood_idx = self.nn_.kneighbors(X_class, return_distance=False)[:, 1:]
nnhood_label = y[nnhood_idx]
if self.kind_sel == "mode":
nnhood_label_majority, _ = mode(nnhood_label, axis=1)
nnhood_label_majority, _ = _mode(nnhood_label, axis=1)
nnhood_bool = np.ravel(nnhood_label_majority) == y_class
elif self.kind_sel == "all":
nnhood_label_majority = nnhood_label == class_minority
Expand Down
26 changes: 26 additions & 0 deletions imblearn/utils/fixes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Compatibility fixes for older version of python, numpy and scipy
If you add content to this file, please give the version of the package
at which the fix is no longer needed.

Backdated from scikit-learn.
"""
# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
# Gael Varoquaux <gael.varoquaux@normalesup.org>
# Fabian Pedregosa <fpedregosa@acm.org>
# Lars Buitinck
#
# License: BSD 3 clause

from sklearn.externals._packaging.version import parse as parse_version
import scipy
import scipy.stats


sp_version = parse_version(scipy.__version__)


# TODO: Remove when SciPy 1.9 is the minimum supported version
def _mode(a, axis=0):
if sp_version >= parse_version("1.9.0"):
return scipy.stats.mode(a, axis=axis, keepdims=True)
return scipy.stats.mode(a, axis=axis)
Copy link
Member Author

@hayesall hayesall Dec 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found the earliest sklearn.utils.fixes._mode in scikit-learn==1.1.2:

pip install scikit-learn==1.1.2
python -c 'import sklearn; print(sklearn.utils.fixes._mode)'

We could add the checks for scikit-learn>=1.1.2 like follows (but I think the current fix is simpler and basically does the same thing):

from sklearn.externals._packaging.version import parse as parse_version
import scipy
import scipy.stats
import sklearn

sk_version = parse_version(sklearn.__version__)
sp_version = parse_version(scipy.__version__)


def _mode(a, axis=0):
    if sp_version >= parse_version("1.9.0"):
        # TODO: Remove when SciPy 1.9 is the minimum supported version
        return scipy.stats.mode(a, axis=axis, keepdims=True)
    elif sk_version >= parse_version("1.1.2"):
        # TODO: Remove when scikit-learn 1.2 is the minimum supported version
        return sklearn.utils.fixes._mode(a, axis=axis)
    return scipy.stats.mode(a, axis=axis)