Skip to content

Commit 847d85f

Browse files
authored
[python] Improve UX for extend_enumeration_values with non-Arrow types (#3939)
1 parent fc5d2c1 commit 847d85f

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

apis/python/src/tiledbsoma/_dataframe.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,12 @@ def extend_enumeration_values(
416416
# These assertions could be done in C++. However, it's easier here
417417
# to do the exception-type multiplexing, raising ValueError for one
418418
# thing, TileDBSOMAError for another.
419-
for column_name in values.keys():
419+
for column_name, values_for_column in values.items():
420+
if not isinstance(values_for_column, pa.Array):
421+
raise ValueError(
422+
f"value for column name '{column_name}' must be pyarrow.Array: got '{type(values_for_column)}'"
423+
)
424+
420425
# As with get_enumeration_values: we are trusting pyarrow to raise
421426
# KeyError, and raise it with a sufficiently clear error message,
422427
# when the column name is not present within the schema.
@@ -425,7 +430,7 @@ def extend_enumeration_values(
425430
raise KeyError(
426431
f"schema column name '{column_name}' is not of dictionary type"
427432
)
428-
if pa.types.is_dictionary(values[column_name].type):
433+
if pa.types.is_dictionary(values_for_column.type):
429434
raise ValueError(
430435
f"value column name '{column_name}' is of dictionary type: pass its dictionary array instead"
431436
)

apis/python/tests/test_dataframe.py

+8
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import somacore
1515
from numpy.testing import assert_array_equal
1616
from pandas.api.types import union_categoricals
17+
from typeguard import suppress_type_checks
1718

1819
import tiledbsoma as soma
1920

@@ -577,6 +578,13 @@ def test_extend_enumeration_values(tmp_path, extend_not_write, ordered):
577578
):
578579
sdf.extend_enumeration_values(xvalues)
579580

581+
# The values provided must be Arrow arrays. Our unit tests run with typeguard,
582+
# but our end users nominally do not -- so we have to ask typeguard to take
583+
# a breather here so we can test the UX our users will have.
584+
with suppress_type_checks():
585+
with pytest.raises(ValueError):
586+
sdf.extend_enumeration_values({"string_enum1": ["plain", "strings"]})
587+
580588
# The values provided must all be non-null
581589
for nvalues in [
582590
{"string_enum1": pa.array(["greetings", None])},

0 commit comments

Comments
 (0)