Skip to content

fastflowtransform.config.contracts

PhysicalTypeConfig

Bases: BaseModel

Engine-specific physical type configuration for a column.

All fields are optional; you can set: - default: applies to all engines if no engine-specific override is set - duckdb, postgres, bigquery, snowflake_snowpark, databricks_spark: engine-specific physical types (e.g. "integer", "NUMERIC", "TIMESTAMP")

Example YAML

physical: "integer"

physical: default: numeric postgres: numeric bigquery: NUMERIC

Source code in src/fastflowtransform/config/contracts.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
class PhysicalTypeConfig(BaseModel):
    """
    Engine-specific physical type configuration for a column.

    All fields are optional; you can set:
      - default: applies to all engines if no engine-specific override is set
      - duckdb, postgres, bigquery, snowflake_snowpark, databricks_spark:
        engine-specific physical types (e.g. "integer", "NUMERIC", "TIMESTAMP")

    Example YAML:
      physical: "integer"

      physical:
        default: numeric
        postgres: numeric
        bigquery: NUMERIC
    """

    model_config = ConfigDict(extra="forbid")

    default: str | None = None
    duckdb: str | None = None
    postgres: str | None = None
    bigquery: str | None = None
    snowflake_snowpark: str | None = None
    databricks_spark: str | None = None

ColumnContractModel

Bases: BaseModel

Column-level contract definition.

Example YAML fragment:

columns:
  id:
    type: integer
    nullable: false
  status:
    type: string
    enum: ["active", "inactive"]
  amount:
    type: double
    nullable: false
    min: 0
    max: 10000
  email:
    type: string
    regex: "^[^@]+@[^@]+$"
Source code in src/fastflowtransform/config/contracts.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class ColumnContractModel(BaseModel):
    """
    Column-level contract definition.

    Example YAML fragment:

        columns:
          id:
            type: integer
            nullable: false
          status:
            type: string
            enum: ["active", "inactive"]
          amount:
            type: double
            nullable: false
            min: 0
            max: 10000
          email:
            type: string
            regex: "^[^@]+@[^@]+$"
    """

    model_config = ConfigDict(extra="forbid")

    # Optional semantic / physical type hint ("integer", "string", "timestamp", ...)
    type: str | None = None

    # Engine-specific physical DB types; see PhysicalTypeConfig.
    physical: PhysicalTypeConfig | None = None

    # Nullability: nullable=False → not_null check
    nullable: bool | None = None

    # Uniqueness: unique=True → unique test
    unique: bool | None = None

    # Enumerated allowed values (accepted_values test)
    enum: list[Any] | None = None

    # Regex constraint; currently used via a generic regex_match test
    regex: str | None = None

    # Numeric range (inclusive) for numeric-like columns
    min: float | int | None = None
    max: float | int | None = None

    # Optional free-form description (handy for docs later)
    description: str | None = None

    @field_validator("enum", mode="before")
    @classmethod
    def _normalize_enum(cls, v: Any) -> list[Any] | None:
        """
        Allow:
          enum: "A"        -> ["A"]
          enum: [1, 2, 3]  -> [1, 2, 3]
        """
        if v is None:
            return None
        if isinstance(v, (list, tuple)):
            return list(v)
        return [v]

    @field_validator("physical", mode="before")
    @classmethod
    def _coerce_physical(cls, v: Any) -> Any:
        """
        Accept either:
          physical: "integer"
          physical:
            default: numeric
            postgres: numeric
            bigquery: NUMERIC
        and normalize to a PhysicalTypeConfig-compatible dict.
        """
        if v is None:
            return None
        if isinstance(v, PhysicalTypeConfig):
            return v
        if isinstance(v, str):
            # Shorthand: same type for all engines → default
            return {"default": v}
        if isinstance(v, dict):
            # Let Pydantic validate keys; we just pass through.
            return v
        raise TypeError(
            "physical must be either a string or a mapping of engine keys to types "
            "(e.g. {default: numeric, postgres: numeric})"
        )

TableSchemaEnforcementModel

Bases: BaseModel

Per-table runtime schema enforcement configuration.

Example in *.contracts.yml:

enforce_schema:
  mode: cast          # off | verify | cast
  allow_extra_columns: false
Source code in src/fastflowtransform/config/contracts.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
class TableSchemaEnforcementModel(BaseModel):
    """
    Per-table runtime schema enforcement configuration.

    Example in *.contracts.yml:

        enforce_schema:
          mode: cast          # off | verify | cast
          allow_extra_columns: false
    """

    model_config = ConfigDict(extra="forbid")

    mode: SchemaEnforcementMode = "off"
    allow_extra_columns: bool = True

    @field_validator("mode", mode="before")
    @classmethod
    def _coerce_mode(cls, v: Any) -> Any:
        # Allow bare `off` from YAML → False
        if v is False:
            return "off"
        if isinstance(v, str):
            return v.strip().lower()
        return v

ContractsFileModel

Bases: BaseModel

One contracts file.

Convention
  • One file describes contracts for exactly one table/relation.
  • The table name is what will be used in DQ tests (SELECT ... FROM ).

    Example *.contracts.yml:

    version: 1
    table: users_enriched
    columns:
      id:
        type: integer
        nullable: false
      status:
        type: string
        enum: ["active", "inactive"]
      email:
        type: string
        nullable: false
        regex: "^[^@]+@[^@]+$"
    
    Source code in src/fastflowtransform/config/contracts.py
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    class ContractsFileModel(BaseModel):
        """
        One contracts file.
    
        Convention:
          - One file describes contracts for exactly one table/relation.
          - The table name is what will be used in DQ tests (SELECT ... FROM <table>).
    
        Example `*.contracts.yml`:
    
            version: 1
            table: users_enriched
            columns:
              id:
                type: integer
                nullable: false
              status:
                type: string
                enum: ["active", "inactive"]
              email:
                type: string
                nullable: false
                regex: "^[^@]+@[^@]+$"
        """
    
        model_config = ConfigDict(extra="forbid")
    
        version: int = 1
        table: str = Field(..., description="Logical/physical table name the contract applies to")
        columns: dict[str, ColumnContractModel] = Field(default_factory=dict)
    
        enforce_schema: TableSchemaEnforcementModel | None = Field(
            default=None,
            description="Optional runtime schema enforcement config for this table",
        )
    

ColumnMatchModel

Bases: BaseModel

Column match expression for project-level defaults.

Currently supports
  • name: regex on column name (required)
  • table: optional regex on table name (future-proof; optional)
Source code in src/fastflowtransform/config/contracts.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
class ColumnMatchModel(BaseModel):
    """
    Column match expression for project-level defaults.

    Currently supports:
      - name: regex on column name (required)
      - table: optional regex on table name (future-proof; optional)
    """

    model_config = ConfigDict(extra="forbid")

    name: str = Field(..., description="Regex to match column name")
    table: str | None = Field(
        default=None, description="Optional regex to restrict to specific tables"
    )

    @model_validator(mode="after")
    def _strip(self) -> ColumnMatchModel:
        object.__setattr__(self, "name", self.name.strip())
        if self.table is not None:
            object.__setattr__(self, "table", self.table.strip())
        return self

ColumnDefaultsRuleModel

Bases: BaseModel

One rule under defaults.columns in contracts.yml.

Example:

defaults:
  columns:
    - match:
        name: ".*_id$"
      type: integer
      nullable: false
    - match:
        name: "created_at"
      type: timestamp
      nullable: false
Source code in src/fastflowtransform/config/contracts.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
class ColumnDefaultsRuleModel(BaseModel):
    """
    One rule under defaults.columns in contracts.yml.

    Example:

        defaults:
          columns:
            - match:
                name: ".*_id$"
              type: integer
              nullable: false
            - match:
                name: "created_at"
              type: timestamp
              nullable: false
    """

    model_config = ConfigDict(extra="forbid")

    match: ColumnMatchModel
    # Payload is the same shape as ColumnContractModel but optional:
    type: str | None = None
    physical: PhysicalTypeConfig | None = None
    nullable: bool | None = None
    unique: bool | None = None
    enum: list[Any] | None = None
    regex: str | None = None
    min: float | None = None
    max: float | None = None
    description: str | None = None

    @field_validator("enum", mode="before")
    @classmethod
    def _normalize_enum(cls, v: Any) -> list[Any] | None:
        if v is None:
            return None
        if isinstance(v, (list, tuple)):
            return list(v)
        return [v]

    @field_validator("physical", mode="before")
    @classmethod
    def _coerce_physical(cls, v: Any) -> Any:
        if v is None:
            return None
        if isinstance(v, PhysicalTypeConfig):
            return v
        if isinstance(v, str):
            return {"default": v}
        if isinstance(v, dict):
            return v
        raise TypeError(
            "defaults.columns[*].physical must be either a string or a mapping of engine "
            "keys to types (e.g. {default: numeric, postgres: numeric})"
        )

ContractsDefaultsModel

Bases: BaseModel

Root defaults block for project-level contracts.yml.

Example:

version: 1

defaults:
  models:
    - match:
        name: "staging.*"
      materialized: table

  columns:
    - match:
        name: ".*_id$"
      type: integer
      nullable: false
    - match:
        name: "created_at"
      type: timestamp
      nullable: false
Source code in src/fastflowtransform/config/contracts.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
class ContractsDefaultsModel(BaseModel):
    """
    Root defaults block for project-level contracts.yml.

    Example:

        version: 1

        defaults:
          models:
            - match:
                name: "staging.*"
              materialized: table

          columns:
            - match:
                name: ".*_id$"
              type: integer
              nullable: false
            - match:
                name: "created_at"
              type: timestamp
              nullable: false
    """

    model_config = ConfigDict(extra="forbid")

    # Future global defaults (e.g. a default severity for contract tests) could live here.
    columns: list[ColumnDefaultsRuleModel] = Field(default_factory=list)

TableSchemaEnforcementOverrideModel

Bases: BaseModel

Per-table override in project-level contracts.yml

Example:

enforcement:
  tables:
    customers:
      mode: cast
      allow_extra_columns: false
Source code in src/fastflowtransform/config/contracts.py
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
class TableSchemaEnforcementOverrideModel(BaseModel):
    """
    Per-table override in project-level contracts.yml

    Example:

        enforcement:
          tables:
            customers:
              mode: cast
              allow_extra_columns: false
    """

    model_config = ConfigDict(extra="forbid")

    mode: SchemaEnforcementMode | None = None
    allow_extra_columns: bool | None = None

ProjectSchemaEnforcementModel

Bases: BaseModel

Project-level schema enforcement defaults (contracts.yml).

Example:

version: 1

enforcement:
  default_mode: verify          # off | verify | cast
  allow_extra_columns: true
  tables:
    customers:
      mode: cast
      allow_extra_columns: false
Source code in src/fastflowtransform/config/contracts.py
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
class ProjectSchemaEnforcementModel(BaseModel):
    """
    Project-level schema enforcement defaults (contracts.yml).

    Example:

        version: 1

        enforcement:
          default_mode: verify          # off | verify | cast
          allow_extra_columns: true
          tables:
            customers:
              mode: cast
              allow_extra_columns: false
    """

    model_config = ConfigDict(extra="forbid")

    default_mode: SchemaEnforcementMode = "off"
    allow_extra_columns: bool = True
    tables: dict[str, TableSchemaEnforcementOverrideModel] = Field(default_factory=dict)

    @field_validator("default_mode", mode="before")
    @classmethod
    def _coerce_default_mode(cls, v: Any) -> Any:
        if v is False:
            return "off"
        # Same comment as above if you ever want to accept `true`.
        if isinstance(v, str):
            return v.strip().lower()
        return v

ProjectContractsModel

Bases: BaseModel

Top-level model for project-level contracts.yml.

Only defines defaults, no table-specific contracts (those live in per-table *.contracts.yml files).

Source code in src/fastflowtransform/config/contracts.py
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
class ProjectContractsModel(BaseModel):
    """
    Top-level model for project-level contracts.yml.

    Only defines defaults, no table-specific contracts (those live in
    per-table *.contracts.yml files).
    """

    model_config = ConfigDict(extra="forbid")

    version: int = 1
    defaults: ContractsDefaultsModel = Field(default_factory=ContractsDefaultsModel)

    enforcement: ProjectSchemaEnforcementModel | None = Field(
        default=None,
        description="Runtime schema enforcement defaults and per-table overrides",
    )

parse_contracts_file

parse_contracts_file(path)

Load and validate a single *.contracts.yml file. Raises a Pydantic validation error or yaml.YAMLError on malformed input.

Source code in src/fastflowtransform/config/contracts.py
393
394
395
396
397
398
399
400
401
402
403
404
405
def parse_contracts_file(path: Path) -> ContractsFileModel:
    """
    Load and validate a single *.contracts.yml file.
    Raises a Pydantic validation error or yaml.YAMLError on malformed input.
    """
    try:
        raw = yaml.load(path.read_text(encoding="utf-8"), Loader=NoDupLoader) or {}
        return ContractsFileModel.model_validate(raw)
    except Exception as exc:
        hint = "Check the contracts YAML for duplicate keys or invalid structure."
        raise ContractsConfigError(
            f"Failed to parse contracts file: {exc}", path=str(path), hint=hint
        ) from exc

parse_project_contracts_file

parse_project_contracts_file(path)

Load and validate the project-level contracts.yml file. Returns ProjectContractsModel, raising on malformed input.

Source code in src/fastflowtransform/config/contracts.py
408
409
410
411
412
413
414
415
416
417
418
419
420
def parse_project_contracts_file(path: Path) -> ProjectContractsModel:
    """
    Load and validate the project-level contracts.yml file.
    Returns ProjectContractsModel, raising on malformed input.
    """
    try:
        raw = yaml.load(path.read_text(encoding="utf-8"), Loader=NoDupLoader) or {}
        return ProjectContractsModel.model_validate(raw)
    except Exception as exc:
        hint = "Check the project-level contracts.yml for duplicate keys or invalid structure."
        raise ContractsConfigError(
            f"Failed to parse project contracts file: {exc}", path=str(path), hint=hint
        ) from exc