Skip to content

DEPR: Deprecate use of strings denoting units with 'M', 'Y' or 'y' in pd.to_timedelta (36666) #36838

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Oct 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
de4fb4d
Throw warning when M,Y,m or y is used in an arg for pd.to_timedelta
avinashpancham Oct 3, 2020
61ec5b6
Update docstring by removing old logic and adding depr notice
avinashpancham Oct 3, 2020
abb613e
Add whatsnew entry
avinashpancham Oct 3, 2020
4ccf43d
Update wording in docs
avinashpancham Oct 3, 2020
271f2fc
Place space at the end of string instead of at the beginning of next …
avinashpancham Oct 3, 2020
7b11a92
Also catch month and year timedelta variations without a space
avinashpancham Oct 4, 2020
b39eadf
Add test for depr behavior
avinashpancham Oct 4, 2020
f56796a
Catch deprecated patterns with a regex to prevent false positives and…
avinashpancham Oct 4, 2020
ad38ad3
Add description to unittest
avinashpancham Oct 12, 2020
bc03065
Also find appearences of unambiguous timedelta values in containers
avinashpancham Oct 13, 2020
d491cce
Update test_unit_parser in test_timedelta.py
avinashpancham Oct 15, 2020
76e6602
Move warnings code to timedeltas.pyx and update tests
avinashpancham Oct 18, 2020
cdc5821
Update code for failing tests
avinashpancham Oct 18, 2020
5c471b3
Merge branch 'master' into GH36666
avinashpancham Oct 18, 2020
02d642b
Remove empty line
avinashpancham Oct 18, 2020
89af5bb
Update depr message
avinashpancham Oct 19, 2020
a5ef0c3
Merge branch 'master' into GH36666
avinashpancham Oct 20, 2020
0bc1a89
Move warning location and update tests
avinashpancham Oct 22, 2020
3682687
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 22, 2020
2abd0ed
Update parse_iso_format_string and tests that give warnings
avinashpancham Oct 22, 2020
6f4e7ca
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 23, 2020
8701f26
Replace ipython directives with code-blocks in whatsnew v0.15.0
avinashpancham Oct 24, 2020
783e039
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 26, 2020
0cc6dff
Update newly added test
avinashpancham Oct 26, 2020
6a02d05
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 27, 2020
71f801e
Update test to prevent depr warning
avinashpancham Oct 28, 2020
e183f9b
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 28, 2020
2d8c9fe
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 30, 2020
1d2871b
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 31, 2020
e01f75d
Revert the depr of lowercase m
avinashpancham Oct 31, 2020
5277943
Revert more depr of m
avinashpancham Oct 31, 2020
3b83aa0
Merge remote-tracking branch 'upstream/master' into GH36666
avinashpancham Oct 31, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ Deprecations
- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)
- Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)

.. ---------------------------------------------------------------------------

Expand Down
14 changes: 13 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import collections
import warnings

import cython

Expand Down Expand Up @@ -466,6 +467,15 @@ cdef inline timedelta_from_spec(object number, object frac, object unit):

try:
unit = ''.join(unit)

if unit in ["M", "Y", "y"]:
warnings.warn(
"Units 'M', 'Y' and 'y' do not represent unambiguous "
"timedelta values and will be removed in a future version",
FutureWarning,
stacklevel=2,
)

if unit == 'M':
# To parse ISO 8601 string, 'M' should be treated as minute,
# not month
Expand Down Expand Up @@ -634,9 +644,11 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1:
else:
neg = 1
elif c in ['W', 'D', 'H', 'M']:
unit.append(c)
if c in ['H', 'M'] and len(number) > 2:
raise ValueError(err_msg)
if c == 'M':
c = 'min'
unit.append(c)
r = timedelta_from_spec(number, '0', unit)
result += timedelta_as_neg(r, neg)

Expand Down
10 changes: 6 additions & 4 deletions pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ def to_timedelta(arg, unit=None, errors="raise"):
Parameters
----------
arg : str, timedelta, list-like or Series
The data to be converted to timedelta. The character M by itself,
e.g. '1M', is treated as minute, not month. The characters Y and y
are treated as the mean length of the Gregorian calendar year -
365.2425 days or 365 days 5 hours 49 minutes 12 seconds.
The data to be converted to timedelta.

.. deprecated:: 1.2
Strings with units 'M', 'Y' and 'y' do not represent
unambiguous timedelta values and will be removed in a future version

unit : str, optional
Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``.

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_properties(self, closed):
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]),
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]),
],
)
def test_length(self, closed, breaks):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/scalar/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def test_hash(self, interval):
(-np.inf, np.inf, np.inf),
(Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")),
(Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")),
(Timedelta("1H10M"), Timedelta("5H5M"), Timedelta("3H55M")),
(Timedelta("5S"), Timedelta("1H"), Timedelta("59M55S")),
(Timedelta("1H10min"), Timedelta("5H5min"), Timedelta("3H55min")),
(Timedelta("5S"), Timedelta("1H"), Timedelta("59min55S")),
],
)
def test_length(self, left, right, expected):
Expand Down
197 changes: 103 additions & 94 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,108 +160,117 @@ def test_nat_converters(self):
assert result.astype("int64") == iNaT

@pytest.mark.parametrize(
"units, np_unit",
[
(["W", "w"], "W"),
(["D", "d", "days", "day", "Days", "Day"], "D"),
(
["m", "minute", "min", "minutes", "t", "Minute", "Min", "Minutes", "T"],
"unit, np_unit",
[(value, "W") for value in ["W", "w"]]
+ [(value, "D") for value in ["D", "d", "days", "day", "Days", "Day"]]
+ [
(value, "m")
for value in [
"m",
),
(["s", "seconds", "sec", "second", "S", "Seconds", "Sec", "Second"], "s"),
(
[
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"l",
"MS",
"Milliseconds",
"Millisecond",
"Milli",
"Millis",
"L",
],
"minute",
"min",
"minutes",
"t",
"Minute",
"Min",
"Minutes",
"T",
]
]
+ [
(value, "s")
for value in [
"s",
"seconds",
"sec",
"second",
"S",
"Seconds",
"Sec",
"Second",
]
]
+ [
(value, "ms")
for value in [
"ms",
),
(
[
"us",
"microseconds",
"microsecond",
"micro",
"micros",
"u",
"US",
"Microseconds",
"Microsecond",
"Micro",
"Micros",
"U",
],
"milliseconds",
"millisecond",
"milli",
"millis",
"l",
"MS",
"Milliseconds",
"Millisecond",
"Milli",
"Millis",
"L",
]
]
+ [
(value, "us")
for value in [
"us",
),
(
[
"ns",
"nanoseconds",
"nanosecond",
"nano",
"nanos",
"n",
"NS",
"Nanoseconds",
"Nanosecond",
"Nano",
"Nanos",
"N",
],
"microseconds",
"microsecond",
"micro",
"micros",
"u",
"US",
"Microseconds",
"Microsecond",
"Micro",
"Micros",
"U",
]
]
+ [
(value, "ns")
for value in [
"ns",
),
"nanoseconds",
"nanosecond",
"nano",
"nanos",
"n",
"NS",
"Nanoseconds",
"Nanosecond",
"Nano",
"Nanos",
"N",
]
],
)
@pytest.mark.parametrize("wrapper", [np.array, list, pd.Index])
def test_unit_parser(self, units, np_unit, wrapper):
def test_unit_parser(self, unit, np_unit, wrapper):
# validate all units, GH 6855, GH 21762
for unit in units:
# array-likes
expected = TimedeltaIndex(
[np.timedelta64(i, np_unit) for i in np.arange(5).tolist()]
)
result = to_timedelta(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)

if unit == "M":
# M is treated as minutes in string repr
expected = TimedeltaIndex(
[np.timedelta64(i, "m") for i in np.arange(5).tolist()]
)

str_repr = [f"{x}{unit}" for x in np.arange(5)]
result = to_timedelta(wrapper(str_repr))
tm.assert_index_equal(result, expected)
result = TimedeltaIndex(wrapper(str_repr))
tm.assert_index_equal(result, expected)

# scalar
expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]"))

result = to_timedelta(2, unit=unit)
assert result == expected
result = Timedelta(2, unit=unit)
assert result == expected

if unit == "M":
expected = Timedelta(np.timedelta64(2, "m").astype("timedelta64[ns]"))

result = to_timedelta(f"2{unit}")
assert result == expected
result = Timedelta(f"2{unit}")
assert result == expected
# array-likes
expected = TimedeltaIndex(
[np.timedelta64(i, np_unit) for i in np.arange(5).tolist()]
)
result = to_timedelta(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)

str_repr = [f"{x}{unit}" for x in np.arange(5)]
result = to_timedelta(wrapper(str_repr))
tm.assert_index_equal(result, expected)
result = to_timedelta(wrapper(str_repr))
tm.assert_index_equal(result, expected)

# scalar
expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]"))
result = to_timedelta(2, unit=unit)
assert result == expected
result = Timedelta(2, unit=unit)
assert result == expected

result = to_timedelta(f"2{unit}")
assert result == expected
result = Timedelta(f"2{unit}")
assert result == expected

@pytest.mark.parametrize("unit", ["Y", "y", "M"])
def test_unit_m_y_raises(self, unit):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/methods/test_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_shift_always_copy(self, ser, shift_size):
# GH22397
assert ser.shift(shift_size) is not ser

@pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1M")])
@pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1min")])
def test_datetime_shift_always_copy(self, move_by_freq):
# GH#22397
ser = Series(range(5), index=date_range("2017", periods=5))
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/tools/test_to_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,27 @@ def test_to_timedelta_invalid(self):
invalid_data, to_timedelta(invalid_data, errors="ignore")
)

@pytest.mark.parametrize(
"val, warning",
[
("1M", FutureWarning),
("1 M", FutureWarning),
("1Y", FutureWarning),
("1 Y", FutureWarning),
("1y", FutureWarning),
("1 y", FutureWarning),
("1m", None),
("1 m", None),
("1 day", None),
("2day", None),
],
)
def test_unambiguous_timedelta_values(self, val, warning):
# GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y'
# in pd.to_timedelta
with tm.assert_produces_warning(warning, check_stacklevel=False):
to_timedelta(val)

def test_to_timedelta_via_apply(self):
# GH 5458
expected = Series([np.timedelta64(1, "s")])
Expand Down