Skip to content

feat: Allow nullable primary keys #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions dataframely/columns/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Column(ABC):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
check: Callable[[pl.Expr], pl.Expr] | None = None,
alias: str | None = None,
Expand All @@ -37,8 +37,10 @@ def __init__(
"""
Args:
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
check: A custom check to run for this column. Must return a non-aggregated
boolean expression.
alias: An overwrite for this column's name which allows for using a column
Expand All @@ -48,7 +50,7 @@ def __init__(
internally sets the alias to the column's name in the parent schema.
metadata: A dictionary of metadata to attach to the column.
"""
self.nullable = nullable and not primary_key
self.nullable = nullable if nullable is not None else not primary_key
self.primary_key = primary_key
self.check = check
self.alias = alias
Expand Down
24 changes: 16 additions & 8 deletions dataframely/columns/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Date(OrdinalMixin[dt.date], Column):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
min: dt.date | None = None,
min_exclusive: dt.date | None = None,
Expand All @@ -46,8 +46,10 @@ def __init__(
"""
Args:
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
min: The minimum date for dates in this column (inclusive).
min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
is specified and vice versa.
Expand Down Expand Up @@ -142,7 +144,7 @@ class Time(OrdinalMixin[dt.time], Column):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
min: dt.time | None = None,
min_exclusive: dt.time | None = None,
Expand All @@ -156,8 +158,10 @@ def __init__(
"""
Args:
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
min: The minimum time for times in this column (inclusive).
min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
is specified and vice versa.
Expand Down Expand Up @@ -258,7 +262,7 @@ class Datetime(OrdinalMixin[dt.datetime], Column):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
min: dt.datetime | None = None,
min_exclusive: dt.datetime | None = None,
Expand All @@ -272,8 +276,10 @@ def __init__(
"""
Args:
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
min: The minimum datetime for datetimes in this column (inclusive).
min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
is specified and vice versa.
Expand Down Expand Up @@ -364,7 +370,7 @@ class Duration(OrdinalMixin[dt.timedelta], Column):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
min: dt.timedelta | None = None,
min_exclusive: dt.timedelta | None = None,
Expand All @@ -378,8 +384,10 @@ def __init__(
"""
Args:
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
min: The minimum duration for durations in this column (inclusive).
min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
is specified and vice versa.
Expand Down
6 changes: 4 additions & 2 deletions dataframely/columns/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(
precision: int | None = None,
scale: int = 0,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
min: decimal.Decimal | None = None,
min_exclusive: decimal.Decimal | None = None,
Expand All @@ -42,8 +42,10 @@ def __init__(
precision: Maximum number of digits in each number.
scale: Number of digits to the right of the decimal point in each number.
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
min: The minimum value for decimals in this column (inclusive).
min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
is specified and vice versa.
Expand Down
6 changes: 4 additions & 2 deletions dataframely/columns/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(
self,
categories: Sequence[str],
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
check: Callable[[pl.Expr], pl.Expr] | None = None,
alias: str | None = None,
Expand All @@ -32,8 +32,10 @@ def __init__(
Args:
categories: The list of valid categories for the enum.
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
check: A custom check to run for this column. Must return a non-aggregated
boolean expression.
alias: An overwrite for this column's name which allows for using a column
Expand Down
6 changes: 4 additions & 2 deletions dataframely/columns/float.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class _BaseFloat(OrdinalMixin[float], Column):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
allow_inf_nan: bool = False,
min: float | None = None,
Expand All @@ -40,8 +40,10 @@ def __init__(
"""
Args:
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
allow_inf_nan: Whether this column may contain NaN and infinity values.
min: The minimum value for floats in this column (inclusive).
min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
Expand Down
6 changes: 4 additions & 2 deletions dataframely/columns/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class _BaseInteger(IsInMixin[int], OrdinalMixin[int], Column):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
min: int | None = None,
min_exclusive: int | None = None,
Expand All @@ -37,8 +37,10 @@ def __init__(
"""
Args:
nullable: Whether this column may contain null values.
If `None`, the default behavior is as follows:
- If `primary_key` is `True`, `nullable` defaults to `False`.
- If `primary_key` is `False`, `nullable` defaults to `True`.
primary_key: Whether this column is part of the primary key of the schema.
If ``True``, ``nullable`` is automatically set to ``False``.
min: The minimum value for integers in this column (inclusive).
min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
is specified and vice versa.
Expand Down
2 changes: 1 addition & 1 deletion dataframely/columns/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(
self,
inner: Column,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
check: Callable[[pl.Expr], pl.Expr] | None = None,
alias: str | None = None,
Expand Down
2 changes: 1 addition & 1 deletion dataframely/columns/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class String(Column):
def __init__(
self,
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
min_length: int | None = None,
max_length: int | None = None,
Expand Down
2 changes: 1 addition & 1 deletion dataframely/columns/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(
self,
inner: dict[str, Column],
*,
nullable: bool = True,
nullable: bool | None = None,
primary_key: bool = False,
check: Callable[[pl.Expr], pl.Expr] | None = None,
alias: str | None = None,
Expand Down
8 changes: 7 additions & 1 deletion tests/schema/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


class MySchema(dy.Schema):
a = dy.Int64(primary_key=True)
a = dy.Int64(primary_key=True, nullable=True)
b = dy.String(nullable=False, max_length=5)
c = dy.String()

Expand Down Expand Up @@ -116,3 +116,9 @@ def test_success_multi_row_strip_cast(
)
assert_frame_equal(actual, expected)
assert MySchema.is_valid(df, cast=True)


@pytest.mark.parametrize("df_type", [pl.DataFrame, pl.LazyFrame])
def test_nullable_primary_key(df_type: type[pl.DataFrame] | type[pl.LazyFrame]) -> None:
df = df_type({"a": [None, 2, 3], "b": ["x", "y", "z"], "c": ["1", None, None]})
MySchema.validate(df)
Loading