Skip to content

Commit ab783ab

Browse files
tswastabdelmegahedgoogle
authored andcommitted
test: check extreme DATE/DATETIME values can be loaded from pandas DataFrame (googleapis#1078)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards #1076 🦕 (edit: moved to googleapis/python-db-dtypes-pandas#45 )
1 parent ceb5946 commit ab783ab

File tree

1 file changed

+20
-21
lines changed

1 file changed

+20
-21
lines changed

tests/system/test_pandas.py

+20-21
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
268268
See: https://github.com/googleapis/google-cloud-python/issues/7370
269269
"""
270270
# Schema with all scalar types.
271-
scalars_schema = (
271+
table_schema = (
272272
bigquery.SchemaField("bool_col", "BOOLEAN"),
273273
bigquery.SchemaField("bytes_col", "BYTES"),
274274
bigquery.SchemaField("date_col", "DATE"),
@@ -283,15 +283,6 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
283283
bigquery.SchemaField("ts_col", "TIMESTAMP"),
284284
)
285285

286-
table_schema = scalars_schema + (
287-
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
288-
# mode mismatch. See:
289-
# https://issuetracker.google.com/133415569#comment3
290-
# bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
291-
# TODO: Support writing StructArrays to Parquet. See:
292-
# https://jira.apache.org/jira/browse/ARROW-2587
293-
# bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
294-
)
295286
num_rows = 100
296287
nulls = [None] * num_rows
297288
df_data = [
@@ -372,7 +363,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
372363
# See:
373364
# https://github.com/googleapis/python-bigquery/issues/61
374365
# https://issuetracker.google.com/issues/151765076
375-
scalars_schema = (
366+
table_schema = (
367+
bigquery.SchemaField("row_num", "INTEGER"),
376368
bigquery.SchemaField("bool_col", "BOOLEAN"),
377369
bigquery.SchemaField("bytes_col", "BYTES"),
378370
bigquery.SchemaField("date_col", "DATE"),
@@ -387,17 +379,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
387379
bigquery.SchemaField("ts_col", "TIMESTAMP"),
388380
)
389381

390-
table_schema = scalars_schema + (
391-
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
392-
# mode mismatch. See:
393-
# https://issuetracker.google.com/133415569#comment3
394-
# bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
395-
# TODO: Support writing StructArrays to Parquet. See:
396-
# https://jira.apache.org/jira/browse/ARROW-2587
397-
# bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
398-
)
399-
400382
df_data = [
383+
("row_num", [1, 2, 3]),
401384
("bool_col", [True, None, False]),
402385
("bytes_col", [b"abc", None, b"def"]),
403386
("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]),
@@ -464,6 +447,22 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
464447
assert tuple(table.schema) == table_schema
465448
assert table.num_rows == 3
466449

450+
result = bigquery_client.list_rows(table).to_dataframe()
451+
result.sort_values("row_num", inplace=True)
452+
453+
# Check that extreme DATE/DATETIME values are loaded correctly.
454+
# https://github.com/googleapis/python-bigquery/issues/1076
455+
assert result["date_col"][0] == datetime.date(1, 1, 1)
456+
assert result["date_col"][2] == datetime.date(9999, 12, 31)
457+
assert result["dt_col"][0] == datetime.datetime(1, 1, 1, 0, 0, 0)
458+
assert result["dt_col"][2] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
459+
assert result["ts_col"][0] == datetime.datetime(
460+
1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
461+
)
462+
assert result["ts_col"][2] == datetime.datetime(
463+
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
464+
)
465+
467466

468467
def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id):
469468
"""Test that a DataFrame with struct datatype can be uploaded if a

0 commit comments

Comments
 (0)