Skip to content

Commit 8cebca6

Browse files
fix: Make group rules work on nullable grouping columns
1 parent 3bf3acf commit 8cebca6

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

dataframely/_rule.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,5 +126,5 @@ def _with_group_rules(lf: pl.LazyFrame, rules: dict[str, GroupRule]) -> pl.LazyF
126126
# preserves the order of the left data frame.
127127
result = lf
128128
for group_columns, frame in group_evaluations.items():
129-
result = result.join(frame, on=list(group_columns), how="left")
129+
result = result.join(frame, on=list(group_columns), how="left", nulls_equal=True)
130130
return result

tests/schema/test_validate.py

+10
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,13 @@ def test_success_multi_row_strip_cast(
116116
)
117117
assert_frame_equal(actual, expected)
118118
assert MySchema.is_valid(df, cast=True)
119+
120+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pl.LazyFrame])
121+
def test_group_rule_on_nulls(
122+
df_type: type[pl.DataFrame] | type[pl.LazyFrame]
123+
):
124+
# The schema is violated because we have multiple "b" values for the same "a" value
125+
df = df_type({"a" : [None, None],"b" : [1, 2]})
126+
with pytest.raises(RuleValidationError):
127+
MyComplexSchema.validate(df, cast=True)
128+
assert not MyComplexSchema.is_valid(df, cast=True)

0 commit comments

Comments
 (0)