diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 3c4b38a93b8ee..18ab118c4bf16 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -698,6 +698,7 @@ Deprecations - Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`) - Deprecated passing arguments as positional in :meth:`DataFrame.reset_index` (other than ``"level"``) and :meth:`Series.reset_index` (:issue:`41485`) - Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`) +- Deprecated behavior of :class:`Series` construction with large-integer values and small-integer dtype silently overflowing; use ``Series(data).astype(dtype)`` instead (:issue:`41734`) - Deprecated inference of ``timedelta64[ns]``, ``datetime64[ns]``, or ``DatetimeTZDtype`` dtypes in :class:`Series` construction when data containing strings is passed and no ``dtype`` is passed (:issue:`33558`) - In a future version, constructing :class:`Series` or :class:`DataFrame` with ``datetime64[ns]`` data and ``DatetimeTZDtype`` will treat the data as wall-times instead of as UTC times (matching DatetimeIndex behavior). To treat the data as UTC times, use ``pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(dtype.tz)`` or ``pd.Series(data.view("int64"), dtype=dtype)`` (:issue:`33401`) - Deprecated passing arguments as positional in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis` (other than ``"labels"``) (:issue:`41485`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8a230e5da01dc..5c7211a5d1852 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2036,7 +2036,7 @@ def construct_1d_ndarray_preserving_na( def maybe_cast_to_integer_array( arr: list | np.ndarray, dtype: np.dtype, copy: bool = False -): +) -> np.ndarray: """ Takes any dtype and returns the casted version, raising for when data is incompatible with integer/unsigned integer dtypes. @@ -2107,6 +2107,20 @@ def maybe_cast_to_integer_array( if is_float_dtype(arr.dtype) or is_object_dtype(arr.dtype): raise ValueError("Trying to coerce float values to integers") + if casted.dtype < arr.dtype: + # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows + warnings.warn( + f"Values are too large to be losslessly cast to {dtype}. " + "In a future version this will raise OverflowError. To retain the " + f"old behavior, use pd.Series(values).astype({dtype})", + FutureWarning, + stacklevel=find_stack_level(), + ) + return casted + + # No known cases that get here, but raising explicitly to cover our bases. + raise ValueError(f"values cannot be losslessly cast to {dtype}") + def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar: """ diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 4a7c4faade00d..b617514f383af 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -358,7 +358,7 @@ def test_unstack_preserve_dtypes(self): "E": Series([1.0, 50.0, 100.0]).astype("float32"), "F": Series([3.0, 4.0, 5.0]).astype("float64"), "G": False, - "H": Series([1, 200, 923442], dtype="int8"), + "H": Series([1, 200, 923442]).astype("int8"), } ) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f03322f9b0d6c..9376bd5f025b3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -711,6 +711,21 @@ def test_constructor_cast(self): with pytest.raises(ValueError, match=msg): Series(["a", "b", "c"], dtype=float) + def test_constructor_signed_int_overflow_deprecation(self): + # GH#41734 disallow silent overflow + msg = "Values are too large to be losslessly cast" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([1, 200, 923442], dtype="int8") + + expected = Series([1, -56, 50], dtype="int8") + tm.assert_series_equal(ser, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([1, 200, 923442], dtype="uint8") + + expected = Series([1, 200, 50], dtype="uint8") + tm.assert_series_equal(ser, expected) + def test_constructor_unsigned_dtype_overflow(self, uint_dtype): # see gh-15832 msg = "Trying to coerce negative values to unsigned integers"