Skip to content

fastflowtransform.config.seeds

SeedTargetConfig

Bases: BaseModel

Configuration for a single seed target entry in seeds/schema.yml.

Example

targets: raw/users: schema: raw table: seed_users schema_by_engine: duckdb: main postgres: raw

Source code in src/fastflowtransform/config/seeds.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class SeedTargetConfig(BaseModel):
    """
    Configuration for a single seed target entry in seeds/schema.yml.

    Example:
      targets:
        raw/users:
          schema: raw
          table: seed_users
          schema_by_engine:
            duckdb: main
            postgres: raw
    """

    model_config = ConfigDict(extra="forbid", populate_by_name=True)

    schema_: str | None = Field(default=None, alias="schema")
    table: str | None = None
    schema_by_engine: dict[EngineType, str] = Field(default_factory=dict)

    @field_validator("schema_")
    @classmethod
    def _strip_schema(cls, value: str | None) -> str | None:
        if isinstance(value, str):
            value = value.strip()
            return value or None
        return value

    @field_validator("schema_by_engine")
    @classmethod
    def _strip_schema_by_engine(cls, value: dict[str, str]) -> dict[str, str]:
        out: dict[str, str] = {}
        for eng, sch in (value or {}).items():
            if not isinstance(sch, str):
                continue
            sch_clean = sch.strip()
            if sch_clean:
                out[eng] = sch_clean
        return out

    @model_validator(mode="after")
    def _allow_empty_schema(self) -> SeedTargetConfig:
        # At the moment we allow targets without schema / schema_by_engine,
        # so that the executor/default schema can still be used.
        # If you want to enforce at least one schema, uncomment the check below.
        #
        # if not self.schema and not self.schema_by_engine:
        #     raise ValueError(
        #         "Either 'schema' or 'schema_by_engine' must be set for a seed target"
        #     )
        return self

SeedColumnConfig

Bases: BaseModel

Column typing metadata for seeds/schema.yml.

Source code in src/fastflowtransform/config/seeds.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
class SeedColumnConfig(BaseModel):
    """Column typing metadata for seeds/schema.yml."""

    model_config = ConfigDict(extra="forbid", populate_by_name=True)

    type_: str | None = Field(default=None, alias="type")
    engines: dict[EngineType, str] = Field(default_factory=dict)

    @field_validator("type_")
    @classmethod
    def _strip_type(cls, value: str | None) -> str | None:
        if isinstance(value, str):
            cleaned = value.strip()
            return cleaned or None
        return value

    @field_validator("engines")
    @classmethod
    def _strip_engines(cls, value: dict[str, str]) -> dict[str, str]:
        out: dict[str, str] = {}
        for eng, typ in (value or {}).items():
            if not isinstance(typ, str):
                continue
            typ_clean = typ.strip()
            if typ_clean:
                out[eng] = typ_clean
        return out

SeedsSchemaConfig

Bases: BaseModel

Top-level configuration for seeds/schema.yml.

Structure

targets: : schema: ... table: ... schema_by_engine: { duckdb: ..., postgres: ... }

dtypes: : column_a: string column_b: int64

columns: : : type: string|integer|timestamp|... engines: postgres: timestamptz

Source code in src/fastflowtransform/config/seeds.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
class SeedsSchemaConfig(BaseModel):
    """
    Top-level configuration for seeds/schema.yml.

    Structure:
      targets:
        <seed-id>:
          schema: ...
          table: ...
          schema_by_engine: { duckdb: ..., postgres: ... }

      dtypes:
        <table-key>:
          column_a: string
          column_b: int64

      columns:
        <table-key>:
          <column-name>:
            type: string|integer|timestamp|...
            engines:
              postgres: timestamptz
    """

    model_config = ConfigDict(extra="forbid")

    targets: dict[str, SeedTargetConfig] = Field(default_factory=dict)
    dtypes: dict[str, dict[str, str]] = Field(default_factory=dict)
    columns: dict[str, dict[str, Any]] = Field(default_factory=dict)

    @field_validator("dtypes")
    @classmethod
    def _normalize_dtypes(cls, value: dict[str, dict[str, Any]]) -> dict[str, dict[str, str]]:
        out: dict[str, dict[str, str]] = {}
        for table_key, cols in (value or {}).items():
            if not isinstance(cols, dict):
                continue
            clean_cols: dict[str, str] = {}
            for col, dtype in cols.items():
                if not isinstance(col, str) or not isinstance(dtype, str):
                    continue
                col_clean = col.strip()
                dtype_clean = dtype.strip()
                if col_clean and dtype_clean:
                    clean_cols[col_clean] = dtype_clean
            if clean_cols:
                out[table_key] = clean_cols
        return out

    @field_validator("columns")
    @classmethod
    def _normalize_columns(
        cls, value: dict[str, dict[str, Any]]
    ) -> dict[str, dict[str, SeedColumnConfig]]:
        out: dict[str, dict[str, SeedColumnConfig]] = {}
        for table_key, cols in (value or {}).items():
            if not isinstance(cols, dict):
                continue
            clean_cols: dict[str, SeedColumnConfig] = {}
            for col_name, payload in cols.items():
                if not isinstance(col_name, str):
                    continue
                col_clean = col_name.strip()
                if not col_clean:
                    continue
                if isinstance(payload, dict):
                    clean_cols[col_clean] = SeedColumnConfig.model_validate(payload)
                elif isinstance(payload, str):
                    clean_cols[col_clean] = SeedColumnConfig(type=payload)
            if clean_cols:
                out[table_key] = clean_cols
        return out

load_seeds_schema

load_seeds_schema(project_dir, seeds_dir=None)

Load and validate seeds/schema.yml for a given project.

Returns:

Type Description
SeedsSchemaConfig | None
  • SeedsSchemaConfig instance when the file exists and is valid
SeedsSchemaConfig | None
  • None when no file is present

Raises:

Type Description
ValueError

when YAML is present but does not match the expected schema.

Source code in src/fastflowtransform/config/seeds.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def load_seeds_schema(project_dir: Path, seeds_dir: Path | None = None) -> SeedsSchemaConfig | None:
    """
    Load and validate seeds/schema.yml for a given project.

    Returns:
      - SeedsSchemaConfig instance when the file exists and is valid
      - None when no file is present

    Raises:
      ValueError: when YAML is present but does not match the expected schema.
    """
    seeds_dir = seeds_dir or project_dir / "seeds"
    cfg_path = seeds_dir / "schema.yml"
    if not cfg_path.exists():
        return None

    raw = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
    try:
        return SeedsSchemaConfig.model_validate(raw)
    except ValidationError as exc:
        raise ValueError(f"Failed to parse seeds/schema.yml: {exc}") from exc