Skip to content

Commit e13abaf

Browse files
authored
fix: allow extreme DATE values such as datetime.date(1, 1, 1) in load_gbq (#442)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-pandas/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #441 Towards #365 🦕
1 parent 928e47b commit e13abaf

File tree

5 files changed

+61
-12
lines changed

5 files changed

+61
-12
lines changed

ci/requirements-3.7-0.24.2.conda

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
codecov
22
coverage
3-
db-dtypes==0.3.0
3+
db-dtypes==0.3.1
44
fastavro
55
flake8
66
numpy==1.16.6

pandas_gbq/load.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,13 @@ def cast_dataframe_for_parquet(
9090
# Use extension dtype first so that it uses the correct equality operator.
9191
and db_dtypes.DateDtype() != dataframe[column_name].dtype
9292
):
93-
# Construct converted column manually, because I can't use
94-
# .astype() with DateDtype. With .astype(), I get the error:
95-
#
96-
# TypeError: Cannot interpret '<db_dtypes.DateDtype ...>' as a data type
97-
cast_column = pandas.Series(
98-
dataframe[column_name], dtype=db_dtypes.DateDtype()
93+
cast_column = dataframe[column_name].astype(
94+
dtype=db_dtypes.DateDtype(),
95+
# Return the original column if there was an error converting
96+
# to the dtype, such as is there is a date outside the
97+
# supported range.
98+
# https://github.com/googleapis/python-bigquery-pandas/issues/441
99+
errors="ignore",
99100
)
100101
elif column_type in {"NUMERIC", "DECIMAL", "BIGNUMERIC", "BIGDECIMAL"}:
101102
cast_column = dataframe[column_name].map(decimal.Decimal)

setup.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@
2323
release_status = "Development Status :: 4 - Beta"
2424
dependencies = [
2525
"setuptools",
26-
"db-dtypes >=0.3.0,<2.0.0",
27-
"numpy>=1.16.6",
28-
"pandas>=0.24.2",
26+
"db-dtypes >=0.3.1,<2.0.0",
27+
"numpy >=1.16.6",
28+
"pandas >=0.24.2",
2929
"pyarrow >=3.0.0, <7.0dev",
3030
"pydata-google-auth",
3131
"google-auth",
3232
"google-auth-oauthlib",
3333
# 2.4.* has a bug where waiting for the query can hang indefinitely.
3434
# https://github.com/pydata/pandas-gbq/issues/343
35-
"google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*",
35+
"google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<3.0.0dev,!=2.4.*",
3636
]
3737
extras = {
3838
"tqdm": "tqdm>=4.23.0",

testing/constraints-3.7.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#
66
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
77
# Then this file should have foo==1.14.0
8-
db-dtypes==0.3.0
8+
db-dtypes==0.3.1
99
google-auth==1.4.1
1010
google-auth-oauthlib==0.0.1
1111
google-cloud-bigquery==1.11.1

tests/system/test_to_gbq.py

+48
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,54 @@ def test_series_round_trip(
188188
{"name": "num_col", "type": "NUMERIC"},
189189
],
190190
),
191+
pytest.param(
192+
*DataFrameRoundTripTestCase(
193+
input_df=pandas.DataFrame(
194+
{
195+
"row_num": [1, 2, 3],
196+
# DATE valuess outside the pandas range for timestamp
197+
# aren't supported by the db-dtypes package.
198+
# https://github.com/googleapis/python-bigquery-pandas/issues/441
199+
"date_col": [
200+
datetime.date(1, 1, 1),
201+
datetime.date(1970, 1, 1),
202+
datetime.date(9999, 12, 31),
203+
],
204+
# TODO: DATETIME/TIMESTAMP values outside of the range for
205+
# pandas timestamp require `date_as_object` parameter in
206+
# google-cloud-bigquery versions 1.x and 2.x.
207+
# https://github.com/googleapis/python-bigquery-pandas/issues/365
208+
# "datetime_col": [
209+
# datetime.datetime(1, 1, 1),
210+
# datetime.datetime(1970, 1, 1),
211+
# datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
212+
# ],
213+
# "timestamp_col": [
214+
# datetime.datetime(1, 1, 1, tzinfo=datetime.timezone.utc),
215+
# datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
216+
# datetime.datetime(
217+
# 9999,
218+
# 12,
219+
# 31,
220+
# 23,
221+
# 59,
222+
# 59,
223+
# 999999,
224+
# tzinfo=datetime.timezone.utc,
225+
# ),
226+
# ],
227+
},
228+
columns=["row_num", "date_col", "datetime_col", "timestamp_col"],
229+
),
230+
table_schema=[
231+
{"name": "row_num", "type": "INTEGER"},
232+
{"name": "date_col", "type": "DATE"},
233+
{"name": "datetime_col", "type": "DATETIME"},
234+
{"name": "timestamp_col", "type": "TIMESTAMP"},
235+
],
236+
),
237+
id="issue365-extreme-datetimes",
238+
),
191239
]
192240

193241

0 commit comments

Comments
 (0)