From ed9d7f0e97747baaac4d50455afcf263af32ae91 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 15 Aug 2019 15:38:20 +0900 Subject: [PATCH 01/14] Fix issue 27395 --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/dtypes/cast.py | 4 ++-- pandas/tests/dtypes/cast/test_infer_dtype.py | 12 ++++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 21f1fa7ddec1f..b975ab76fdcc8 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -32,7 +32,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result (:issue:`27733`) -- +- Bug in :func:`maybe_cast_to_datetime` where converting a `np.datetime64` to `datetime64[D]` raise `TypeError` (:issue: `27395`) - - diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4bb1deffd9524..29b07b35a3614 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1026,7 +1026,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name in ("datetime64", "datetime64[ns]"): + if dtype.name in ("datetime64", "datetime64[ns]", "datetime64[D]"): if dtype.name == "datetime64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _NS_DTYPE @@ -1044,7 +1044,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name in ("timedelta64", "timedelta64[ns]"): + if dtype.name in ("timedelta64", "timedelta64[ns]", "timedelta64[D]"): if dtype.name == "timedelta64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _TD_DTYPE diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 602b2f26eaa4a..d24a3a852922b 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -5,6 +5,7 @@ from pandas.core.dtypes.cast import ( cast_scalar_to_array, + maybe_cast_to_datetime, infer_dtype_from_array, infer_dtype_from_scalar, ) @@ -169,3 +170,14 @@ def test_cast_scalar_to_array(obj, dtype): arr = cast_scalar_to_array(shape, obj, dtype=dtype) tm.assert_numpy_array_equal(arr, exp) + + +@pytest.mark.parametrize( + "obj,dtype", + [ + (np.datetime64("2017-01-01 01:00:00"), "datetime64[D]"), + (np.datetime64("2017-01-01 02:00:00"), "datetime64[D]"), + ], +) +def test_maybe_cast_to_datetime(obj, dtype): + maybe_cast_to_datetime(obj, dtype) From 49471e785bca57ddd44359c13fb2672de3e1da92 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 15 Aug 2019 17:41:44 +0900 Subject: [PATCH 02/14] Fix lint error --- pandas/tests/dtypes/cast/test_infer_dtype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index d24a3a852922b..a204242c5c98d 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -5,9 +5,9 @@ from pandas.core.dtypes.cast import ( cast_scalar_to_array, - maybe_cast_to_datetime, infer_dtype_from_array, infer_dtype_from_scalar, + maybe_cast_to_datetime, ) from pandas.core.dtypes.common import is_dtype_equal From 9efb16936b632956ba2b5db0af3e450418658eaa Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 15 Aug 2019 21:25:35 +0900 Subject: [PATCH 03/14] shoot numpy-dev future warning --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 29b07b35a3614..54c1bc01b5337 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1055,7 +1055,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_scalar(value): - if value == iNaT or isna(value): + if value is iNaT or isna(value): value = iNaT else: value = np.array(value, copy=False) From 41df89dbaba561d554707cf5cbc174a494884113 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 15 Aug 2019 22:26:15 +0900 Subject: [PATCH 04/14] Reflect reviews - update doc - update dtype matching fucntion - update test --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/dtypes/cast.py | 4 ++-- pandas/tests/indexing/test_loc.py | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index b975ab76fdcc8..808d911ca1e78 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -32,7 +32,6 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result (:issue:`27733`) -- Bug in :func:`maybe_cast_to_datetime` where converting a `np.datetime64` to `datetime64[D]` raise `TypeError` (:issue: `27395`) - - @@ -85,6 +84,7 @@ Indexing - Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`) - Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`) - Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`). +- Fix assignment of column via `.loc` with numpy `non-ns datetime` type (:issue: `27395`) - Missing diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 54c1bc01b5337..80cb841ee1aec 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1026,7 +1026,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name in ("datetime64", "datetime64[ns]", "datetime64[D]"): + if dtype.kind == "M": if dtype.name == "datetime64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _NS_DTYPE @@ -1044,7 +1044,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name in ("timedelta64", "timedelta64[ns]", "timedelta64[D]"): + if dtype.kind == "m": if dtype.name == "timedelta64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _TD_DTYPE diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index abe0cd86c90d7..b7c8eec582b73 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -9,6 +9,7 @@ import pandas as pd from pandas import DataFrame, Series, Timestamp, date_range from pandas.api.types import is_scalar +from pandas.core.dtypes.cast import maybe_cast_to_datetime from pandas.tests.indexing.common import Base from pandas.util import testing as tm @@ -690,6 +691,24 @@ def test_loc_setitem_consistency_slice_column_len(self): ) tm.assert_series_equal(df[("Respondent", "Duration")], expected) + @pytest.mark.parametrize( + "obj,dtype", + [ + (np.datetime64("2017-01-01 01:00:00"), "datetime64"), + (np.datetime64("2017-01-01 02:00:00"), "datetime64[ns]"), + (np.datetime64("2017-01-02 01:00:00"), "datetime64[Y]"), + (np.datetime64("2017-01-03 02:00:00"), "datetime64[M]"), + (np.datetime64("2017-01-04 02:00:00"), "datetime64[D]"), + (np.datetime64("2017-01-05 02:00:00"), "datetime64[h]"), + (np.datetime64("2017-01-06 02:10:00"), "datetime64[m]"), + (np.datetime64("2017-01-07 02:20:10"), "datetime64[s]"), + (np.datetime64("2017-01-08 02:40:20"), "datetime64[ms]"), + (np.datetime64("2017-01-09 02:50:30"), "datetime64[ns]"), + ], + ) + def test_maybe_cast_to_datetime(obj, dtype): + maybe_cast_to_datetime(obj, dtype) + def test_loc_setitem_frame(self): df = self.frame_labels From 6df540c745095cfacb1bfb1feb5451a72bb7246d Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 15 Aug 2019 23:13:05 +0900 Subject: [PATCH 05/14] Fix ci error --- pandas/tests/indexing/test_loc.py | 35 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b7c8eec582b73..88b2eeb5df5d6 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -9,7 +9,6 @@ import pandas as pd from pandas import DataFrame, Series, Timestamp, date_range from pandas.api.types import is_scalar -from pandas.core.dtypes.cast import maybe_cast_to_datetime from pandas.tests.indexing.common import Base from pandas.util import testing as tm @@ -692,22 +691,30 @@ def test_loc_setitem_consistency_slice_column_len(self): tm.assert_series_equal(df[("Respondent", "Duration")], expected) @pytest.mark.parametrize( - "obj,dtype", + "dtype", [ - (np.datetime64("2017-01-01 01:00:00"), "datetime64"), - (np.datetime64("2017-01-01 02:00:00"), "datetime64[ns]"), - (np.datetime64("2017-01-02 01:00:00"), "datetime64[Y]"), - (np.datetime64("2017-01-03 02:00:00"), "datetime64[M]"), - (np.datetime64("2017-01-04 02:00:00"), "datetime64[D]"), - (np.datetime64("2017-01-05 02:00:00"), "datetime64[h]"), - (np.datetime64("2017-01-06 02:10:00"), "datetime64[m]"), - (np.datetime64("2017-01-07 02:20:10"), "datetime64[s]"), - (np.datetime64("2017-01-08 02:40:20"), "datetime64[ms]"), - (np.datetime64("2017-01-09 02:50:30"), "datetime64[ns]"), + "datetime64", + "datetime64[ns]", + "datetime64[Y]", + "datetime64[M]", + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[ns]", ], ) - def test_maybe_cast_to_datetime(obj, dtype): - maybe_cast_to_datetime(obj, dtype) + def test_loc_assign_non_ns_datetime(self, dtype): + df = pd.DataFrame( + { + "timestamp": [ + np.datetime64("2017-01-01 01:11:20"), + np.datetime64("2017-01-01 02:01:30"), + ] + } + ) + df.loc[:, "day"] = df.loc[:, "timestamp"].values.astype(dtype) def test_loc_setitem_frame(self): df = self.frame_labels From 98a5061bbcc04bb6114cbf1bf69efe15f0e66432 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Fri, 16 Aug 2019 13:32:29 +0900 Subject: [PATCH 06/14] Fix dtype match except [ps] --- pandas/core/dtypes/cast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 80cb841ee1aec..bd079a7bb6ff8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1026,7 +1026,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.kind == "M": + if (dtype.kind == "M") and (dtype.name != "datetime64[ps]"): if dtype.name == "datetime64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _NS_DTYPE @@ -1044,7 +1044,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.kind == "m": + if (dtype.kind == "m") and (dtype.name != "timedelta64[ps]"): if dtype.name == "timedelta64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _TD_DTYPE From cf18fb9ad473107d87dba56155898249071c0186 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Wed, 21 Aug 2019 23:51:10 +0900 Subject: [PATCH 07/14] Reflect reviews - update docs - remove previous tests - update tests - revert to the changes that made to pass the ci tests --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/dtypes/cast.py | 6 +- pandas/tests/dtypes/cast/test_infer_dtype.py | 12 ---- pandas/tests/indexing/test_loc.py | 70 ++++++++++++++------ 4 files changed, 54 insertions(+), 36 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 808d911ca1e78..4e063a28335f8 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -84,7 +84,7 @@ Indexing - Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`) - Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`) - Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`). -- Fix assignment of column via `.loc` with numpy `non-ns datetime` type (:issue: `27395`) +- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) - Missing diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index bd079a7bb6ff8..f01b47ba5bc51 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1026,7 +1026,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if (dtype.kind == "M") and (dtype.name != "datetime64[ps]"): + if dtype.kind == "M": if dtype.name == "datetime64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _NS_DTYPE @@ -1044,7 +1044,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if (dtype.kind == "m") and (dtype.name != "timedelta64[ps]"): + if dtype.kind == "m": if dtype.name == "timedelta64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _TD_DTYPE @@ -1055,7 +1055,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_scalar(value): - if value is iNaT or isna(value): + if value == iNaT or isna(value): value = iNaT else: value = np.array(value, copy=False) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index a204242c5c98d..602b2f26eaa4a 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -7,7 +7,6 @@ cast_scalar_to_array, infer_dtype_from_array, infer_dtype_from_scalar, - maybe_cast_to_datetime, ) from pandas.core.dtypes.common import is_dtype_equal @@ -170,14 +169,3 @@ def test_cast_scalar_to_array(obj, dtype): arr = cast_scalar_to_array(shape, obj, dtype=dtype) tm.assert_numpy_array_equal(arr, exp) - - -@pytest.mark.parametrize( - "obj,dtype", - [ - (np.datetime64("2017-01-01 01:00:00"), "datetime64[D]"), - (np.datetime64("2017-01-01 02:00:00"), "datetime64[D]"), - ], -) -def test_maybe_cast_to_datetime(obj, dtype): - maybe_cast_to_datetime(obj, dtype) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 88b2eeb5df5d6..97e894738ac96 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -690,31 +690,61 @@ def test_loc_setitem_consistency_slice_column_len(self): ) tm.assert_series_equal(df[("Respondent", "Duration")], expected) - @pytest.mark.parametrize( - "dtype", - [ - "datetime64", - "datetime64[ns]", - "datetime64[Y]", - "datetime64[M]", - "datetime64[D]", - "datetime64[h]", - "datetime64[m]", - "datetime64[s]", - "datetime64[ms]", - "datetime64[ns]", - ], - ) - def test_loc_assign_non_ns_datetime(self, dtype): - df = pd.DataFrame( + def test_loc_assign_non_ns_datetime(self): + df = DataFrame( { "timestamp": [ - np.datetime64("2017-01-01 01:11:20"), - np.datetime64("2017-01-01 02:01:30"), + np.datetime64("2017-02-11 12:41:29"), + np.datetime64("1991-11-07 04:22:37"), ] } ) - df.loc[:, "day"] = df.loc[:, "timestamp"].values.astype(dtype) + + df.loc[:, "year"] = df.loc[:, "timestamp"].values.astype("datetime64[Y]") + df.loc[:, "month"] = df.loc[:, "timestamp"].values.astype("datetime64[M]") + df.loc[:, "day"] = df.loc[:, "timestamp"].values.astype("datetime64[D]") + df.loc[:, "hour"] = df.loc[:, "timestamp"].values.astype("datetime64[h]") + df.loc[:, "minute"] = df.loc[:, "timestamp"].values.astype("datetime64[m]") + df.loc[:, "second"] = df.loc[:, "timestamp"].values.astype("datetime64[s]") + df.loc[:, "millisecond"] = df.loc[:, "timestamp"].values.astype( + "datetime64[ms]" + ) + result = df.loc[ + :, ["year", "month", "day", "hour", "minute", "second", "millisecond"] + ] + expected = DataFrame( + { + "year": [ + np.datetime64("2017-02-11 12:41:29", "Y"), + np.datetime64("1991-11-07 04:22:37", "Y"), + ], + "month": [ + np.datetime64("2017-02-11 12:41:29", "M"), + np.datetime64("1991-11-07 04:22:37", "M"), + ], + "day": [ + np.datetime64("2017-02-11 12:41:29", "D"), + np.datetime64("1991-11-07 04:22:37", "D"), + ], + "hour": [ + np.datetime64("2017-02-11 12:41:29", "h"), + np.datetime64("1991-11-07 04:22:37", "h"), + ], + "minute": [ + np.datetime64("2017-02-11 12:41:29", "m"), + np.datetime64("1991-11-07 04:22:37", "m"), + ], + "second": [ + np.datetime64("2017-02-11 12:41:29", "s"), + np.datetime64("1991-11-07 04:22:37", "s"), + ], + "millisecond": [ + np.datetime64("2017-02-11 12:41:29", "ms"), + np.datetime64("1991-11-07 04:22:37", "ms"), + ], + } + ) + tm.assert_frame_equal(result, expected) def test_loc_setitem_frame(self): df = self.frame_labels From 93c2f85e1b159f1e75b2c1f73a9311e0fc6c1274 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 29 Aug 2019 21:43:25 +0900 Subject: [PATCH 08/14] Fix CI error --- pandas/tests/indexing/test_loc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 97e894738ac96..7e4161eb9b792 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -743,6 +743,8 @@ def test_loc_assign_non_ns_datetime(self): np.datetime64("1991-11-07 04:22:37", "ms"), ], } + ).reindex( + columns=["year", "month", "day", "hour", "minute", "second", "millisecond"] ) tm.assert_frame_equal(result, expected) From 35009b75f1ebe49e8e981a1698af474d35f69919 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Sat, 31 Aug 2019 23:03:55 +0900 Subject: [PATCH 09/14] Fix CI error --- pandas/core/dtypes/cast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f01b47ba5bc51..8d48548f4def7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1026,7 +1026,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.kind == "M": + if dtype <= np.dtype("M8[ns]"): if dtype.name == "datetime64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _NS_DTYPE @@ -1044,7 +1044,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.kind == "m": + if dtype <= np.dtype("M8[ns]"): if dtype.name == "timedelta64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _TD_DTYPE From 6a48ca6847277014664a81e00c1b7b984e7e494d Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Sat, 31 Aug 2019 23:53:10 +0900 Subject: [PATCH 10/14] Fix CI error --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8d48548f4def7..156757d617bd9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1044,7 +1044,7 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype <= np.dtype("M8[ns]"): + if dtype <= np.dtype("m8[ns]"): if dtype.name == "timedelta64": raise ValueError(msg.format(dtype=dtype.name)) dtype = _TD_DTYPE From d64a2d8bde446ca6b0db984b044f482cb03c4dda Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 12 Sep 2019 21:46:00 +0900 Subject: [PATCH 11/14] Update whatsnew --- doc/source/whatsnew/v0.25.2.rst | 1 - doc/source/whatsnew/v1.0.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 9f3984a0ac13b..de411ef63680a 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -49,7 +49,6 @@ Interval Indexing ^^^^^^^^ -- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) - - - diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ab821e13f99cc..8a0d5830ad0de 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -164,6 +164,7 @@ Indexing - Bug in assignment using a reverse slicer (:issue:`26939`) - Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`) +- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) Missing ^^^^^^^ From 86fd1c1f40a1115902de61c74c5f1c613c229e96 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Thu, 12 Sep 2019 23:09:09 +0900 Subject: [PATCH 12/14] Add comments --- pandas/core/dtypes/cast.py | 6 ++++ pandas/tests/indexing/test_loc.py | 55 +++++-------------------------- 2 files changed, 15 insertions(+), 46 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 156757d617bd9..2920d9e6b2f45 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1026,6 +1026,9 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): ) if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): + + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] if dtype <= np.dtype("M8[ns]"): if dtype.name == "datetime64": raise ValueError(msg.format(dtype=dtype.name)) @@ -1044,6 +1047,9 @@ def maybe_cast_to_datetime(value, dtype, errors="raise"): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): + + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] if dtype <= np.dtype("m8[ns]"): if dtype.name == "timedelta64": raise ValueError(msg.format(dtype=dtype.name)) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 31d1cef06dbe6..18e5393c2022b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -690,7 +690,10 @@ def test_loc_setitem_consistency_slice_column_len(self): ) tm.assert_series_equal(df[("Respondent", "Duration")], expected) - def test_loc_assign_non_ns_datetime(self): + @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"]) + def test_loc_assign_non_ns_datetime(self, unit): + # GH 27395, non-ns dtype assignment via .loc should work + # and return the same result when using simple assignment df = DataFrame( { "timestamp": [ @@ -700,53 +703,13 @@ def test_loc_assign_non_ns_datetime(self): } ) - df.loc[:, "year"] = df.loc[:, "timestamp"].values.astype("datetime64[Y]") - df.loc[:, "month"] = df.loc[:, "timestamp"].values.astype("datetime64[M]") - df.loc[:, "day"] = df.loc[:, "timestamp"].values.astype("datetime64[D]") - df.loc[:, "hour"] = df.loc[:, "timestamp"].values.astype("datetime64[h]") - df.loc[:, "minute"] = df.loc[:, "timestamp"].values.astype("datetime64[m]") - df.loc[:, "second"] = df.loc[:, "timestamp"].values.astype("datetime64[s]") - df.loc[:, "millisecond"] = df.loc[:, "timestamp"].values.astype( - "datetime64[ms]" + df.loc[:, unit] = df.loc[:, "timestamp"].values.astype( + "datetime64[{unit}]".format(unit=unit) ) - result = df.loc[ - :, ["year", "month", "day", "hour", "minute", "second", "millisecond"] - ] - expected = DataFrame( - { - "year": [ - np.datetime64("2017-02-11 12:41:29", "Y"), - np.datetime64("1991-11-07 04:22:37", "Y"), - ], - "month": [ - np.datetime64("2017-02-11 12:41:29", "M"), - np.datetime64("1991-11-07 04:22:37", "M"), - ], - "day": [ - np.datetime64("2017-02-11 12:41:29", "D"), - np.datetime64("1991-11-07 04:22:37", "D"), - ], - "hour": [ - np.datetime64("2017-02-11 12:41:29", "h"), - np.datetime64("1991-11-07 04:22:37", "h"), - ], - "minute": [ - np.datetime64("2017-02-11 12:41:29", "m"), - np.datetime64("1991-11-07 04:22:37", "m"), - ], - "second": [ - np.datetime64("2017-02-11 12:41:29", "s"), - np.datetime64("1991-11-07 04:22:37", "s"), - ], - "millisecond": [ - np.datetime64("2017-02-11 12:41:29", "ms"), - np.datetime64("1991-11-07 04:22:37", "ms"), - ], - } - ).reindex( - columns=["year", "month", "day", "hour", "minute", "second", "millisecond"] + df[:, "expected"] = df.loc[:, "timestamp"].values.astype( + "datetime64[{unit}]".format(unit=unit) ) - tm.assert_frame_equal(result, expected) + tm.assert_series_equal(df[:, unit], df[:, "expected"]) def test_loc_setitem_frame(self): df = self.frame_labels From 48e454a1c85811bf801ce1df20a0634dda657755 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Fri, 13 Sep 2019 11:32:26 +0900 Subject: [PATCH 13/14] Fix CI error --- pandas/tests/indexing/test_loc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 18e5393c2022b..32c58b3f623ef 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -706,10 +706,11 @@ def test_loc_assign_non_ns_datetime(self, unit): df.loc[:, unit] = df.loc[:, "timestamp"].values.astype( "datetime64[{unit}]".format(unit=unit) ) - df[:, "expected"] = df.loc[:, "timestamp"].values.astype( + df["expected"] = df.loc[:, "timestamp"].values.astype( "datetime64[{unit}]".format(unit=unit) ) - tm.assert_series_equal(df[:, unit], df[:, "expected"]) + expected = Series(df.loc[:, "expected"], name="day") + tm.assert_series_equal(df.loc[:, unit], expected) def test_loc_setitem_frame(self): df = self.frame_labels From c8eb91f46a1166ad6c81a4f778cea0e4090059e5 Mon Sep 17 00:00:00 2001 From: inmoonlight Date: Fri, 13 Sep 2019 20:55:05 +0900 Subject: [PATCH 14/14] Fix CI error --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 32c58b3f623ef..35291efecd1ac 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -709,7 +709,7 @@ def test_loc_assign_non_ns_datetime(self, unit): df["expected"] = df.loc[:, "timestamp"].values.astype( "datetime64[{unit}]".format(unit=unit) ) - expected = Series(df.loc[:, "expected"], name="day") + expected = Series(df.loc[:, "expected"], name=unit) tm.assert_series_equal(df.loc[:, unit], expected) def test_loc_setitem_frame(self):