fastflowtransform.executors.snowflake_snowpark_exec¶

SnowflakeSnowparkExecutor ¶

Bases: BaseExecutor[DataFrame]

Source code in src/fastflowtransform/executors/snowflake_snowpark_exec.py

class SnowflakeSnowparkExecutor(BaseExecutor[SNDF]):
    ENGINE_NAME = "snowflake_snowpark"
    """Snowflake executor operating on Snowpark DataFrames (no pandas)."""

    def __init__(self, cfg: dict):
        # cfg: {account, user, password, warehouse, database, schema, role?}
        self.session = Session.builder.configs(cfg).create()
        self.database = cfg["database"]
        self.schema = cfg["schema"]
        # Provide a tiny testing shim so tests can call executor.con.execute("SQL")
        self.con = _SFCursorShim(self.session)

    # ---------- Helpers ----------
    def _q(self, s: str) -> str:
        return '"' + s.replace('"', '""') + '"'

    def _qualified(self, rel: str) -> str:
        # "DB"."SCHEMA"."TABLE"
        return f"{self._q(self.database)}.{self._q(self.schema)}.{self._q(rel)}"

    # ---------- Frame-Hooks ----------
    def _read_relation(self, relation: str, node: Node, deps: Iterable[str]) -> SNDF:
        return self.session.table(self._qualified(relation))

    def _materialize_relation(self, relation: str, df: SNDF, node: Node) -> None:
        if not self._is_frame(df):
            raise TypeError("Snowpark model must return a Snowpark DataFrame")
        df.write.save_as_table(self._qualified(relation), mode="overwrite")

    def _create_view_over_table(self, view_name: str, backing_table: str, node: Node) -> None:
        qv = self._qualified(view_name)
        qb = self._qualified(backing_table)
        self.session.sql(f"CREATE OR REPLACE VIEW {qv} AS SELECT * FROM {qb}").collect()

    def _validate_required(
        self, node_name: str, inputs: Any, requires: dict[str, set[str]]
    ) -> None:
        if not requires:
            return

        def cols(df: SNDF) -> set[str]:
            # Snowpark: schema names
            return set(df.schema.names)

        errors: list[str] = []
        # Single dependency
        if isinstance(inputs, SNDF):
            need = next(iter(requires.values()), set())
            missing = need - cols(inputs)
            if missing:
                errors.append(f"- missing columns: {sorted(missing)} | have={sorted(cols(inputs))}")
        else:
            # Multiple dependencies
            for rel, need in requires.items():
                if rel not in inputs:
                    errors.append(f"- missing dependency key '{rel}'")
                    continue
                missing = need - cols(inputs[rel])
                if missing:
                    errors.append(
                        f"- [{rel}] missing: {sorted(missing)} | have={sorted(cols(inputs[rel]))}"
                    )

        if errors:
            raise ValueError(
                "Required columns check failed for Snowpark model "
                f"'{node_name}'.\n" + "\n".join(errors)
            )

    def _columns_of(self, frame: SNDF) -> list[str]:
        return list(frame.schema.names)

    def _is_frame(self, obj: Any) -> bool:
        return isinstance(obj, SNDF)

    def _frame_name(self) -> str:
        return "Snowpark"

    # ---- SQL hooks ----
    def _format_relation_for_ref(self, name: str) -> str:
        return self._qualified(relation_for(name))

    def _format_source_reference(
        self, cfg: dict[str, Any], source_name: str, table_name: str
    ) -> str:
        if cfg.get("location"):
            raise NotImplementedError("Snowflake executor does not support path-based sources.")

        ident = cfg.get("identifier")
        if not ident:
            raise KeyError(f"Source {source_name}.{table_name} missing identifier")

        db = cfg.get("database") or cfg.get("catalog") or self.database
        sch = cfg.get("schema") or self.schema
        if not db or not sch:
            raise KeyError(
                f"Source {source_name}.{table_name} missing database/schema for Snowflake"
            )
        return f"{self._q(db)}.{self._q(sch)}.{self._q(ident)}"

    def _create_or_replace_view(self, target_sql: str, select_body: str, node: Node) -> None:
        self.session.sql(f"CREATE OR REPLACE VIEW {target_sql} AS {select_body}").collect()

    def _create_or_replace_table(self, target_sql: str, select_body: str, node: Node) -> None:
        self.session.sql(f"CREATE OR REPLACE TABLE {target_sql} AS {select_body}").collect()

    def _create_or_replace_view_from_table(
        self, view_name: str, backing_table: str, node: Node
    ) -> None:
        view_id = self._qualified(view_name)
        back_id = self._qualified(backing_table)
        self.session.sql(f"CREATE OR REPLACE VIEW {view_id} AS SELECT * FROM {back_id}").collect()

    # ---- Meta hook ----
    def on_node_built(self, node: Node, relation: str, fingerprint: str) -> None:
        """After successful materialization, upsert _ff_meta (best-effort)."""
        try:
            ensure_meta_table(self)
            upsert_meta(self, node.name, relation, fingerprint, "snowflake_snowpark")
        except Exception:
            pass

    # ── Incremental API (parity with DuckDB/PG) ──────────────────────────
    def exists_relation(self, relation: str) -> bool:
        """Check existence via information_schema.tables."""
        db = self._q(self.database)
        q = f"""
          select 1
          from {db}.information_schema.tables
          where table_schema = {self._q(self.schema)}
            and lower(table_name) = lower({self._q(relation)})
          limit 1
        """
        try:
            return bool(self.session.sql(q).collect())
        except Exception:
            return False

    def create_table_as(self, relation: str, select_sql: str) -> None:
        body = self._first_select_body(select_sql).strip().rstrip(";\n\t ")
        self.session.sql(f"CREATE OR REPLACE TABLE {self._qualified(relation)} AS {body}").collect()

    def incremental_insert(self, relation: str, select_sql: str) -> None:
        body = self._first_select_body(select_sql).strip().rstrip(";\n\t ")
        self.session.sql(f"INSERT INTO {self._qualified(relation)} {body}").collect()

    def incremental_merge(self, relation: str, select_sql: str, unique_key: list[str]) -> None:
        """
        Portable fallback without explicit column list:
          - WITH src AS (<body>)
          - DELETE ... USING src ...
          - INSERT ... SELECT * FROM src
        This avoids Snowflake MERGE column listing complexity.
        """
        body = self._first_select_body(select_sql).strip().rstrip(";\n\t ")
        pred = " AND ".join([f"t.{k}=s.{k}" for k in unique_key]) or "FALSE"
        qrel = self._qualified(relation)
        sql = f"""
        WITH src AS ({body})
        DELETE FROM {qrel} AS t USING src AS s WHERE {pred};
        INSERT INTO {qrel} SELECT * FROM src;
        """
        self.session.sql(sql).collect()

    def alter_table_sync_schema(
        self, relation: str, select_sql: str, *, mode: str = "append_new_columns"
    ) -> None:
        """
        Best-effort additive schema sync:
          - infer SELECT schema via LIMIT 0
          - add missing columns as STRING
        """
        if mode not in {"append_new_columns", "sync_all_columns"}:
            return
        qrel = self._qualified(relation)
        try:
            existing = {
                r[0]
                for r in self.session.sql(
                    f"""
                select column_name
                from {self._q(self.database)}.information_schema.columns
                where table_schema={self._q(self.schema)}
                  and lower(table_name)=lower({self._q(relation)})
                """
                ).collect()
            }
        except Exception:
            existing = set()
        # Probe SELECT columns
        body = self._first_select_body(select_sql).strip().rstrip(";\n\t ")
        probe = self.session.sql(f"SELECT * FROM ({body}) q WHERE 1=0")
        probe_cols = list(probe.schema.names)
        to_add = [c for c in probe_cols if c not in existing]
        if not to_add:
            return
        cols_sql = ", ".join(f"{self._q(c)} STRING" for c in to_add)
        self.session.sql(f"ALTER TABLE {qrel} ADD COLUMN {cols_sql}").collect()

ENGINE_NAME `class-attribute` `instance-attribute` ¶

ENGINE_NAME = 'snowflake_snowpark'

Snowflake executor operating on Snowpark DataFrames (no pandas).

on_node_built ¶

on_node_built(node, relation, fingerprint)

After successful materialization, upsert _ff_meta (best-effort).

Source code in src/fastflowtransform/executors/snowflake_snowpark_exec.py

def on_node_built(self, node: Node, relation: str, fingerprint: str) -> None:
    """After successful materialization, upsert _ff_meta (best-effort)."""
    try:
        ensure_meta_table(self)
        upsert_meta(self, node.name, relation, fingerprint, "snowflake_snowpark")
    except Exception:
        pass

exists_relation ¶

exists_relation(relation)

Check existence via information_schema.tables.

Source code in src/fastflowtransform/executors/snowflake_snowpark_exec.py

def exists_relation(self, relation: str) -> bool:
    """Check existence via information_schema.tables."""
    db = self._q(self.database)
    q = f"""
      select 1
      from {db}.information_schema.tables
      where table_schema = {self._q(self.schema)}
        and lower(table_name) = lower({self._q(relation)})
      limit 1
    """
    try:
        return bool(self.session.sql(q).collect())
    except Exception:
        return False

incremental_merge ¶

incremental_merge(relation, select_sql, unique_key)

Portable fallback without explicit column list

WITH src AS ()
DELETE ... USING src ...
INSERT ... SELECT * FROM src

This avoids Snowflake MERGE column listing complexity.

Source code in src/fastflowtransform/executors/snowflake_snowpark_exec.py

def incremental_merge(self, relation: str, select_sql: str, unique_key: list[str]) -> None:
    """
    Portable fallback without explicit column list:
      - WITH src AS (<body>)
      - DELETE ... USING src ...
      - INSERT ... SELECT * FROM src
    This avoids Snowflake MERGE column listing complexity.
    """
    body = self._first_select_body(select_sql).strip().rstrip(";\n\t ")
    pred = " AND ".join([f"t.{k}=s.{k}" for k in unique_key]) or "FALSE"
    qrel = self._qualified(relation)
    sql = f"""
    WITH src AS ({body})
    DELETE FROM {qrel} AS t USING src AS s WHERE {pred};
    INSERT INTO {qrel} SELECT * FROM src;
    """
    self.session.sql(sql).collect()

alter_table_sync_schema ¶

alter_table_sync_schema(relation, select_sql, *, mode='append_new_columns')

Best-effort additive schema sync

infer SELECT schema via LIMIT 0
add missing columns as STRING

Source code in src/fastflowtransform/executors/snowflake_snowpark_exec.py

def alter_table_sync_schema(
    self, relation: str, select_sql: str, *, mode: str = "append_new_columns"
) -> None:
    """
    Best-effort additive schema sync:
      - infer SELECT schema via LIMIT 0
      - add missing columns as STRING
    """
    if mode not in {"append_new_columns", "sync_all_columns"}:
        return
    qrel = self._qualified(relation)
    try:
        existing = {
            r[0]
            for r in self.session.sql(
                f"""
            select column_name
            from {self._q(self.database)}.information_schema.columns
            where table_schema={self._q(self.schema)}
              and lower(table_name)=lower({self._q(relation)})
            """
            ).collect()
        }
    except Exception:
        existing = set()
    # Probe SELECT columns
    body = self._first_select_body(select_sql).strip().rstrip(";\n\t ")
    probe = self.session.sql(f"SELECT * FROM ({body}) q WHERE 1=0")
    probe_cols = list(probe.schema.names)
    to_add = [c for c in probe_cols if c not in existing]
    if not to_add:
        return
    cols_sql = ", ".join(f"{self._q(c)} STRING" for c in to_add)
    self.session.sql(f"ALTER TABLE {qrel} ADD COLUMN {cols_sql}").collect()

run_sql ¶

run_sql(node, env)

Orchestrate SQL models

1) Render Jinja (ref/source/this) and strip leading {{ config(...) }}. 2) If the SQL is full DDL (CREATE …), execute it verbatim (passthrough). 3) Otherwise, normalize to CREATE OR REPLACE {TABLE|VIEW} AS . The body is CTE-aware (keeps WITH … SELECT … intact).

On failure, raise ModelExecutionError with a helpful snippet.

Source code in src/fastflowtransform/executors/base.py

def run_sql(self, node: Node, env: Environment) -> None:
    """
    Orchestrate SQL models:
      1) Render Jinja (ref/source/this) and strip leading {{ config(...) }}.
      2) If the SQL is full DDL (CREATE …), execute it verbatim (passthrough).
      3) Otherwise, normalize to CREATE OR REPLACE {TABLE|VIEW} AS <body>.
         The body is CTE-aware (keeps WITH … SELECT … intact).
    On failure, raise ModelExecutionError with a helpful snippet.
    """
    sql_rendered = self.render_sql(
        node,
        env,
        ref_resolver=lambda name: self._resolve_ref(name, env),
        source_resolver=self._resolve_source,
    )
    sql = self._strip_leading_config(sql_rendered).strip()

    materialization = (node.meta or {}).get("materialized", "table")
    if materialization == "ephemeral":
        return

    # 1) Direct DDL passthrough (CREATE [OR REPLACE] {TABLE|VIEW} …)
    if self._looks_like_direct_ddl(sql):
        try:
            self._execute_sql_direct(sql, node)
            return
        except NotImplementedError:
            # Engine doesn't implement direct DDL → fall back to normalized materialization.
            pass
        except Exception as e:
            raise ModelExecutionError(
                node_name=node.name,
                relation=relation_for(node.name),
                message=str(e),
                sql_snippet=sql,
            ) from e

    # 2) Normalized materialization path (CTE-safe body)
    body = self._selectable_body(sql).rstrip(" ;\n\t")
    target_sql = self._format_relation_for_ref(node.name)

    # Centralized SQL preview logging (applies to ALL engines)
    preview = (
        f"=== MATERIALIZE ===\n"
        f"-- model: {node.name}\n"
        f"-- materialized: {materialization}\n"
        f"-- target: {target_sql}\n"
        f"{body}\n"
    )
    echo_debug(preview)

    try:
        self._apply_sql_materialization(node, target_sql, body, materialization)
    except Exception as e:
        preview = f"-- materialized={materialization}\n-- target={target_sql}\n{body}"
        raise ModelExecutionError(
            node_name=node.name,
            relation=relation_for(node.name),
            message=str(e),
            sql_snippet=preview,
        ) from e

fastflowtransform.executors.snowflake_snowpark_exec¶

SnowflakeSnowparkExecutor ¶

ENGINE_NAME class-attribute instance-attribute ¶

on_node_built ¶

exists_relation ¶

incremental_merge ¶

alter_table_sync_schema ¶

run_sql ¶

ENGINE_NAME `class-attribute` `instance-attribute` ¶