Skip to content

Commit d2f32df

Browse files
authored
fix: ensure BIGNUMERIC type is used if scale > 9 in Decimal values (#844)
1 parent b32a9c9 commit d2f32df

File tree

2 files changed

+33
-0
lines changed

2 files changed

+33
-0
lines changed

pandas_gbq/schema/pyarrow_to_bigquery.py

+7
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ def arrow_type_to_bigquery_field(
7272
return schema.SchemaField(name, "TIMESTAMP")
7373

7474
detected_type = _ARROW_SCALAR_IDS_TO_BQ.get(type_.id, None)
75+
76+
# We need a special case for values that might fit in Arrow decimal128 but
77+
# not with the scale/precision that is used in BigQuery's NUMERIC type.
78+
# See: https://github.com/googleapis/python-bigquery/issues/1650
79+
if detected_type == "NUMERIC" and type_.scale > 9:
80+
detected_type = "BIGNUMERIC"
81+
7582
if detected_type is not None:
7683
return schema.SchemaField(name, detected_type)
7784

tests/unit/schema/test_pandas_to_bigquery.py

+26
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import collections
66
import datetime
7+
import decimal
78
import operator
89

910
from google.cloud.bigquery import schema
@@ -46,6 +47,29 @@ def test_dataframe_to_bigquery_fields_w_named_index(module_under_test):
4647
),
4748
],
4849
),
50+
# Need to fallback to Arrow to avoid data loss and disambiguate
51+
# NUMERIC from BIGNUMERIC. We don't want to pick too small of a
52+
# type and lose precision. See:
53+
# https://github.com/googleapis/python-bigquery/issues/1650
54+
#
55+
(
56+
"bignumeric_column",
57+
[
58+
# Start with a lower precision Decimal to make sure we
59+
# aren't trying to determine the type from just one value.
60+
decimal.Decimal("1.25"),
61+
decimal.Decimal("0.1234567891"),
62+
],
63+
),
64+
(
65+
"numeric_column",
66+
[
67+
# Minimum value greater than 0 that can be handled: 1e-9
68+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric_types
69+
decimal.Decimal("0.000000001"),
70+
decimal.Decimal("-0.000000001"),
71+
],
72+
),
4973
]
5074
)
5175
dataframe = pandas.DataFrame(df_data).set_index("str_index", drop=True)
@@ -64,6 +88,8 @@ def test_dataframe_to_bigquery_fields_w_named_index(module_under_test):
6488
schema.SchemaField("boolean_column", "BOOLEAN", "NULLABLE"),
6589
schema.SchemaField("datetime_column", "DATETIME", "NULLABLE"),
6690
schema.SchemaField("timestamp_column", "TIMESTAMP", "NULLABLE"),
91+
schema.SchemaField("bignumeric_column", "BIGNUMERIC", "NULLABLE"),
92+
schema.SchemaField("numeric_column", "NUMERIC", "NULLABLE"),
6793
)
6894
assert returned_schema == expected_schema
6995

0 commit comments

Comments
 (0)