FIX make sure to accept "minority" as a valid strategy in over-samplers (#964)

Prakhyath07 · glemaitre · web-flow · commit 79107e848629 · 2022-12-28T12:18:26.000+01:00
Co-authored-by: Guillaume Lemaitre &lt;g.lemaitre58@gmail.com&gt;
diff --git a/doc/whats_new/v0.10.rst b/doc/whats_new/v0.10.rst
@@ -1,5 +1,20 @@
 .. _changes_0_10:
 
+Version 0.10.1
+==============
+
+**December 28, 2022**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+- Fix a regression in over-sampler where the string `minority` was rejected as
+  an unvalid sampling strategy.
+  :pr:`964` by :user:`Prakhyath Bhandary <Prakhyath07>`.
+
 Version 0.10.0
 ==============
 
diff --git a/imblearn/over_sampling/base.py b/imblearn/over_sampling/base.py
@@ -61,7 +61,7 @@ class BaseOverSampler(BaseSampler):
     _parameter_constraints: dict = {
         "sampling_strategy": [
             Interval(numbers.Real, 0, 1, closed="right"),
-            StrOptions({"auto", "majority", "not minority", "not majority", "all"}),
+            StrOptions({"auto", "minority", "not minority", "not majority", "all"}),
             Mapping,
             callable,
         ],
diff --git a/imblearn/over_sampling/tests/test_random_over_sampler.py b/imblearn/over_sampling/tests/test_random_over_sampler.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import pytest
+from sklearn.datasets import make_classification
 from sklearn.utils._testing import (
     _convert_container,
     assert_allclose,
@@ -255,3 +256,20 @@ def test_random_over_sampler_shrinkage_error(data, shrinkage, err_msg):
     ros = RandomOverSampler(shrinkage=shrinkage)
     with pytest.raises(ValueError, match=err_msg):
         ros.fit_resample(X, y)
+
+
+@pytest.mark.parametrize(
+    "sampling_strategy", ["auto", "minority", "not minority", "not majority", "all"]
+)
+def test_random_over_sampler_strings(sampling_strategy):
+    """Check that we support all supposed strings as `sampling_strategy` in
+    a sampler inheriting from `BaseOverSampler`."""
+
+    X, y = make_classification(
+        n_samples=100,
+        n_clusters_per_class=1,
+        n_classes=3,
+        weights=[0.1, 0.3, 0.6],
+        random_state=0,
+    )
+    RandomOverSampler(sampling_strategy=sampling_strategy).fit_resample(X, y)
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import pytest
+from sklearn.datasets import make_classification
 from sklearn.utils._testing import assert_array_equal
 
 from imblearn.under_sampling import RandomUnderSampler
@@ -130,3 +131,20 @@ def test_random_under_sampling_nan_inf():
     assert y_res.shape == (6,)
     assert X_res.shape == (6, 2)
     assert np.any(~np.isfinite(X_res))
+
+
+@pytest.mark.parametrize(
+    "sampling_strategy", ["auto", "majority", "not minority", "not majority", "all"]
+)
+def test_random_under_sampler_strings(sampling_strategy):
+    """Check that we support all supposed strings as `sampling_strategy` in
+    a sampler inheriting from `BaseUnderSampler`."""
+
+    X, y = make_classification(
+        n_samples=100,
+        n_clusters_per_class=1,
+        n_classes=3,
+        weights=[0.1, 0.3, 0.6],
+        random_state=0,
+    )
+    RandomUnderSampler(sampling_strategy=sampling_strategy).fit_resample(X, y)
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py
@@ -4,6 +4,8 @@
 # License: MIT
 
 import numpy as np
+import pytest
+from sklearn.datasets import make_classification
 from sklearn.utils._testing import assert_array_equal
 
 from imblearn.under_sampling import TomekLinks
@@ -68,3 +70,20 @@ def test_tl_fit_resample():
     y_gt = np.array([1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0])
     assert_array_equal(X_resampled, X_gt)
     assert_array_equal(y_resampled, y_gt)
+
+
+@pytest.mark.parametrize(
+    "sampling_strategy", ["auto", "majority", "not minority", "not majority", "all"]
+)
+def test_tomek_links_strings(sampling_strategy):
+    """Check that we support all supposed strings as `sampling_strategy` in
+    a sampler inheriting from `BaseCleaningSampler`."""
+
+    X, y = make_classification(
+        n_samples=100,
+        n_clusters_per_class=1,
+        n_classes=3,
+        weights=[0.1, 0.3, 0.6],
+        random_state=0,
+    )
+    TomekLinks(sampling_strategy=sampling_strategy).fit_resample(X, y)