Skip to content

Commit 0f9adcd

Browse files
authored
BUG: Series([Timestamp, int], dtype=m8ns) dropping nanoseconds (#40100)
1 parent 3bfbb54 commit 0f9adcd

File tree

7 files changed

+59
-20
lines changed

7 files changed

+59
-20
lines changed

pandas/_libs/tslib.pyx

+10-5
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,8 @@ cpdef array_to_datetime(
376376
bint dayfirst=False,
377377
bint yearfirst=False,
378378
bint utc=False,
379-
bint require_iso8601=False
379+
bint require_iso8601=False,
380+
bint allow_mixed=False,
380381
):
381382
"""
382383
Converts a 1D array of date-like values to a numpy array of either:
@@ -405,6 +406,8 @@ cpdef array_to_datetime(
405406
indicator whether the dates should be UTC
406407
require_iso8601 : bool, default False
407408
indicator whether the datetime string should be iso8601
409+
allow_mixed : bool, default False
410+
Whether to allow mixed datetimes and integers.
408411
409412
Returns
410413
-------
@@ -597,7 +600,7 @@ cpdef array_to_datetime(
597600
return ignore_errors_out_of_bounds_fallback(values), tz_out
598601

599602
except TypeError:
600-
return array_to_datetime_object(values, errors, dayfirst, yearfirst)
603+
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
601604

602605
if seen_datetime and seen_integer:
603606
# we have mixed datetimes & integers
@@ -609,10 +612,12 @@ cpdef array_to_datetime(
609612
val = values[i]
610613
if is_integer_object(val) or is_float_object(val):
611614
result[i] = NPY_NAT
615+
elif allow_mixed:
616+
pass
612617
elif is_raise:
613618
raise ValueError("mixed datetimes and integers in passed array")
614619
else:
615-
return array_to_datetime_object(values, errors, dayfirst, yearfirst)
620+
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
616621

617622
if seen_datetime_offset and not utc_convert:
618623
# GH#17697
@@ -623,7 +628,7 @@ cpdef array_to_datetime(
623628
# (with individual dateutil.tzoffsets) are returned
624629
is_same_offsets = len(out_tzoffset_vals) == 1
625630
if not is_same_offsets:
626-
return array_to_datetime_object(values, errors, dayfirst, yearfirst)
631+
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
627632
else:
628633
tz_offset = out_tzoffset_vals.pop()
629634
tz_out = pytz.FixedOffset(tz_offset / 60.)
@@ -670,7 +675,7 @@ cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values):
670675

671676
@cython.wraparound(False)
672677
@cython.boundscheck(False)
673-
cdef array_to_datetime_object(
678+
cdef _array_to_datetime_object(
674679
ndarray[object] values,
675680
str errors,
676681
bint dayfirst=False,

pandas/core/construction.py

+2
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,8 @@ def _try_cast(
669669
subarr = arr
670670
else:
671671
subarr = maybe_cast_to_datetime(arr, dtype)
672+
if dtype is not None and dtype.kind == "M":
673+
return subarr
672674

673675
if not isinstance(subarr, ABCExtensionArray):
674676
subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy)

pandas/core/dtypes/cast.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -1572,10 +1572,24 @@ def maybe_cast_to_datetime(
15721572
value = to_timedelta(value, errors="raise")._values
15731573
except OutOfBoundsDatetime:
15741574
raise
1575-
except ValueError:
1575+
except ValueError as err:
15761576
# TODO(GH#40048): only catch dateutil's ParserError
15771577
# once we can reliably import it in all supported versions
1578-
pass
1578+
if "mixed datetimes and integers in passed array" in str(err):
1579+
# We need to catch this in array_to_datetime, otherwise
1580+
# we end up going through numpy which will lose nanoseconds
1581+
# from Timestamps
1582+
try:
1583+
i8vals, tz = tslib.array_to_datetime(
1584+
value, allow_mixed=True
1585+
)
1586+
except ValueError:
1587+
pass
1588+
else:
1589+
from pandas.core.arrays import DatetimeArray
1590+
1591+
dta = DatetimeArray(i8vals).tz_localize(tz)
1592+
value = dta
15791593

15801594
# coerce datetimelike to object
15811595
elif is_datetime64_dtype(

pandas/core/internals/construction.py

+20-9
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,26 @@ def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool
230230
values = _prep_ndarray(values, copy=copy)
231231

232232
if dtype is not None and not is_dtype_equal(values.dtype, dtype):
233-
try:
234-
values = construct_1d_ndarray_preserving_na(
235-
values.ravel(), dtype=dtype, copy=False
236-
).reshape(values.shape)
237-
except Exception as orig:
238-
# e.g. ValueError when trying to cast object dtype to float64
239-
raise ValueError(
240-
f"failed to cast to '{dtype}' (Exception was: {orig})"
241-
) from orig
233+
shape = values.shape
234+
flat = values.ravel()
235+
236+
if not is_integer_dtype(dtype):
237+
# TODO: skipping integer_dtype is needed to keep the tests passing,
238+
# not clear it is correct
239+
# Note: we really only need _try_cast, but keeping to exposed funcs
240+
values = sanitize_array(
241+
flat, None, dtype=dtype, copy=copy, raise_cast_failure=True
242+
)
243+
else:
244+
try:
245+
values = construct_1d_ndarray_preserving_na(
246+
flat, dtype=dtype, copy=False
247+
)
248+
except Exception as err:
249+
# e.g. ValueError when trying to cast object dtype to float64
250+
msg = f"failed to cast to '{dtype}' (Exception was: {err})"
251+
raise ValueError(msg) from err
252+
values = values.reshape(shape)
242253

243254
# _prep_ndarray ensures that values.ndim == 2 at this point
244255
index, columns = _get_axes(

pandas/tests/base/test_constructors.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,7 @@ class TestConstruction:
124124
[
125125
Series,
126126
lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"],
127-
pytest.param(
128-
lambda x, **kwargs: DataFrame(x, **kwargs)[0], marks=pytest.mark.xfail
129-
),
127+
lambda x, **kwargs: DataFrame(x, **kwargs)[0],
130128
Index,
131129
],
132130
)

pandas/tests/frame/test_constructors.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,8 @@ def test_constructor_more(self, float_frame):
11001100

11011101
# can't cast
11021102
mat = np.array(["foo", "bar"], dtype=object).reshape(2, 1)
1103-
with pytest.raises(ValueError, match="cast"):
1103+
msg = "could not convert string to float: 'foo'"
1104+
with pytest.raises(ValueError, match=msg):
11041105
DataFrame(mat, index=[0, 1], columns=[0], dtype=float)
11051106

11061107
dm = DataFrame(DataFrame(float_frame._series))

pandas/tests/series/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,14 @@ def test_constructor_datelike_coercion(self):
760760
result = df.loc["216"]
761761
assert result.dtype == object
762762

763+
def test_constructor_mixed_int_and_timestamp(self, frame_or_series):
764+
# specifically Timestamp with nanos, not datetimes
765+
objs = [Timestamp(9), 10, NaT.value]
766+
result = frame_or_series(objs, dtype="M8[ns]")
767+
768+
expected = frame_or_series([Timestamp(9), Timestamp(10), NaT])
769+
tm.assert_equal(result, expected)
770+
763771
def test_constructor_datetimes_with_nulls(self):
764772
# gh-15869
765773
for arr in [

0 commit comments

Comments
 (0)