Skip to content

BUG: Series([Timestamp, int], dtype=m8ns) dropping nanoseconds #40100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ cpdef array_to_datetime(
bint dayfirst=False,
bint yearfirst=False,
bint utc=False,
bint require_iso8601=False
bint require_iso8601=False,
bint allow_mixed=False,
):
"""
Converts a 1D array of date-like values to a numpy array of either:
Expand Down Expand Up @@ -405,6 +406,8 @@ cpdef array_to_datetime(
indicator whether the dates should be UTC
require_iso8601 : bool, default False
indicator whether the datetime string should be iso8601
allow_mixed : bool, default False
Whether to allow mixed datetimes and integers.

Returns
-------
Expand Down Expand Up @@ -597,7 +600,7 @@ cpdef array_to_datetime(
return ignore_errors_out_of_bounds_fallback(values), tz_out

except TypeError:
return array_to_datetime_object(values, errors, dayfirst, yearfirst)
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)

if seen_datetime and seen_integer:
# we have mixed datetimes & integers
Expand All @@ -609,10 +612,12 @@ cpdef array_to_datetime(
val = values[i]
if is_integer_object(val) or is_float_object(val):
result[i] = NPY_NAT
elif allow_mixed:
pass
elif is_raise:
raise ValueError("mixed datetimes and integers in passed array")
else:
return array_to_datetime_object(values, errors, dayfirst, yearfirst)
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)

if seen_datetime_offset and not utc_convert:
# GH#17697
Expand All @@ -623,7 +628,7 @@ cpdef array_to_datetime(
# (with individual dateutil.tzoffsets) are returned
is_same_offsets = len(out_tzoffset_vals) == 1
if not is_same_offsets:
return array_to_datetime_object(values, errors, dayfirst, yearfirst)
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
else:
tz_offset = out_tzoffset_vals.pop()
tz_out = pytz.FixedOffset(tz_offset / 60.)
Expand Down Expand Up @@ -670,7 +675,7 @@ cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values):

@cython.wraparound(False)
@cython.boundscheck(False)
cdef array_to_datetime_object(
cdef _array_to_datetime_object(
ndarray[object] values,
str errors,
bint dayfirst=False,
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,8 @@ def _try_cast(
subarr = arr
else:
subarr = maybe_cast_to_datetime(arr, dtype)
if dtype is not None and dtype.kind == "M":
return subarr

if not isinstance(subarr, ABCExtensionArray):
subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy)
Expand Down
18 changes: 16 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,10 +1583,24 @@ def maybe_cast_to_datetime(
value = to_timedelta(value, errors="raise")._values
except OutOfBoundsDatetime:
raise
except ValueError:
except ValueError as err:
# TODO(GH#40048): only catch dateutil's ParserError
# once we can reliably import it in all supported versions
pass
if "mixed datetimes and integers in passed array" in str(err):
# We need to catch this in array_to_datetime, otherwise
# we end up going through numpy which will lose nanoseconds
# from Timestamps
try:
i8vals, tz = tslib.array_to_datetime(
value, allow_mixed=True
)
except ValueError:
pass
else:
from pandas.core.arrays import DatetimeArray

dta = DatetimeArray(i8vals).tz_localize(tz)
value = dta

# coerce datetimelike to object
elif is_datetime64_dtype(
Expand Down
29 changes: 20 additions & 9 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,15 +232,26 @@ def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool
values = _prep_ndarray(values, copy=copy)

if dtype is not None and not is_dtype_equal(values.dtype, dtype):
try:
values = construct_1d_ndarray_preserving_na(
values.ravel(), dtype=dtype, copy=False
).reshape(values.shape)
except Exception as orig:
# e.g. ValueError when trying to cast object dtype to float64
raise ValueError(
f"failed to cast to '{dtype}' (Exception was: {orig})"
) from orig
shape = values.shape
flat = values.ravel()

if not is_integer_dtype(dtype):
# TODO: skipping integer_dtype is needed to keep the tests passing,
# not clear it is correct
# Note: we really only need _try_cast, but keeping to exposed funcs
values = sanitize_array(
flat, None, dtype=dtype, copy=copy, raise_cast_failure=True
)
else:
try:
values = construct_1d_ndarray_preserving_na(
flat, dtype=dtype, copy=False
)
except Exception as err:
# e.g. ValueError when trying to cast object dtype to float64
msg = f"failed to cast to '{dtype}' (Exception was: {err})"
raise ValueError(msg) from err
values = values.reshape(shape)

# _prep_ndarray ensures that values.ndim == 2 at this point
index, columns = _get_axes(
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/base/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,7 @@ class TestConstruction:
[
Series,
lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"],
pytest.param(
lambda x, **kwargs: DataFrame(x, **kwargs)[0], marks=pytest.mark.xfail
),
lambda x, **kwargs: DataFrame(x, **kwargs)[0],
Index,
],
)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,7 +1100,8 @@ def test_constructor_more(self, float_frame):

# can't cast
mat = np.array(["foo", "bar"], dtype=object).reshape(2, 1)
with pytest.raises(ValueError, match="cast"):
msg = "could not convert string to float: 'foo'"
with pytest.raises(ValueError, match=msg):
DataFrame(mat, index=[0, 1], columns=[0], dtype=float)

dm = DataFrame(DataFrame(float_frame._series))
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,14 @@ def test_constructor_datelike_coercion(self):
result = df.loc["216"]
assert result.dtype == object

def test_constructor_mixed_int_and_timestamp(self, frame_or_series):
# specifically Timestamp with nanos, not datetimes
objs = [Timestamp(9), 10, NaT.value]
result = frame_or_series(objs, dtype="M8[ns]")

expected = frame_or_series([Timestamp(9), Timestamp(10), NaT])
tm.assert_equal(result, expected)

def test_constructor_datetimes_with_nulls(self):
# gh-15869
for arr in [
Expand Down