Quantco · gab23r · Apr 24, 2025 · Apr 24, 2025
@@ -28,7 +28,7 @@ class Column(ABC):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,
@@ -37,8 +37,10 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             check: A custom check to run for this column. Must return a non-aggregated
                 boolean expression.
             alias: An overwrite for this column's name which allows for using a column
@@ -48,7 +50,7 @@ def __init__(
                 internally sets the alias to the column's name in the parent schema.
             metadata: A dictionary of metadata to attach to the column.
         """
-        self.nullable = nullable and not primary_key
+        self.nullable = nullable if nullable is not None else not primary_key
         self.primary_key = primary_key
         self.check = check
         self.alias = alias

@@ -32,7 +32,7 @@ class Date(OrdinalMixin[dt.date], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.date | None = None,
         min_exclusive: dt.date | None = None,
@@ -46,8 +46,10 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum date for dates in this column (inclusive).
             min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
                 is specified and vice versa.
@@ -142,7 +144,7 @@ class Time(OrdinalMixin[dt.time], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.time | None = None,
         min_exclusive: dt.time | None = None,
@@ -156,8 +158,10 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum time for times in this column (inclusive).
             min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
                 is specified and vice versa.
@@ -258,7 +262,7 @@ class Datetime(OrdinalMixin[dt.datetime], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.datetime | None = None,
         min_exclusive: dt.datetime | None = None,
@@ -272,8 +276,10 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum datetime for datetimes in this column (inclusive).
             min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
                 is specified and vice versa.
@@ -364,7 +370,7 @@ class Duration(OrdinalMixin[dt.timedelta], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.timedelta | None = None,
         min_exclusive: dt.timedelta | None = None,
@@ -378,8 +384,10 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum duration for durations in this column (inclusive).
             min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
                 is specified and vice versa.

@@ -27,7 +27,7 @@ def __init__(
         precision: int | None = None,
         scale: int = 0,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: decimal.Decimal | None = None,
         min_exclusive: decimal.Decimal | None = None,
@@ -42,8 +42,10 @@ def __init__(
             precision: Maximum number of digits in each number.
             scale: Number of digits to the right of the decimal point in each number.
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum value for decimals in this column (inclusive).
             min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
                 is specified and vice versa.

@@ -22,7 +22,7 @@ def __init__(
         self,
         categories: Sequence[str],
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,
@@ -32,8 +32,10 @@ def __init__(
         Args:
             categories: The list of valid categories for the enum.
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             check: A custom check to run for this column. Must return a non-aggregated
                 boolean expression.
             alias: An overwrite for this column's name which allows for using a column

@@ -26,7 +26,7 @@ class _BaseFloat(OrdinalMixin[float], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         allow_inf_nan: bool = False,
         min: float | None = None,
@@ -40,8 +40,10 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             allow_inf_nan: Whether this column may contain NaN and infinity values.
             min: The minimum value for floats in this column (inclusive).
             min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``

@@ -23,7 +23,7 @@ class _BaseInteger(IsInMixin[int], OrdinalMixin[int], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: int | None = None,
         min_exclusive: int | None = None,
@@ -37,8 +37,10 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                If `None`, the default behavior is as follows:
+                - If `primary_key` is `True`, `nullable` defaults to `False`.
+                - If `primary_key` is `False`, `nullable` defaults to `True`.
             primary_key: Whether this column is part of the primary key of the schema.
-                If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum value for integers in this column (inclusive).
             min_exclusive: Like ``min`` but exclusive. May not be specified if ``min``
                 is specified and vice versa.

@@ -24,7 +24,7 @@ def __init__(
         self,
         inner: Column,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,

@@ -23,7 +23,7 @@ class String(Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min_length: int | None = None,
         max_length: int | None = None,

@@ -22,7 +22,7 @@ def __init__(
         self,
         inner: dict[str, Column],
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,

@@ -10,7 +10,7 @@
 
 
 class MySchema(dy.Schema):
-    a = dy.Int64(primary_key=True)
+    a = dy.Int64(primary_key=True, nullable=True)
     b = dy.String(nullable=False, max_length=5)
     c = dy.String()
 
@@ -116,3 +116,9 @@ def test_success_multi_row_strip_cast(
     )
     assert_frame_equal(actual, expected)
     assert MySchema.is_valid(df, cast=True)
+
+
+@pytest.mark.parametrize("df_type", [pl.DataFrame, pl.LazyFrame])
+def test_nullable_primary_key(df_type: type[pl.DataFrame] | type[pl.LazyFrame]) -> None:
+    df = df_type({"a": [None, 2, 3], "b": ["x", "y", "z"], "c": ["1", None, None]})
+    MySchema.validate(df)