fastflowtransform.artifacts¶

write_manifest ¶

write_manifest(project_dir)

Write manifest.json with minimal compatibility: - nodes: {name, path, deps, materialized, relation, kind} - macros: {name -> path} - sources: verbatim REGISTRY.sources - generated_at

Source code in src/fastflowtransform/artifacts.py

def write_manifest(project_dir: Path) -> Path:
    """
    Write manifest.json with minimal compatibility:
      - nodes: {name, path, deps, materialized, relation, kind}
      - macros: {name -> path}
      - sources: verbatim REGISTRY.sources
      - generated_at
    """
    project_dir = Path(project_dir)
    out_dir = _target_dir(project_dir)
    manifest_path = out_dir / "manifest.json"

    nodes = {}
    for name, node in sorted(REGISTRY.nodes.items(), key=lambda x: x[0]):
        nodes[name] = {
            "name": name,
            "kind": node.kind,
            # Be resilient to stubbed Nodes in tests (path may be None)
            "path": _rel_safe(getattr(node, "path", None), project_dir),
            "deps": sorted(list(node.deps or [])),
            "materialized": (node.meta or {}).get("materialized", "table"),
            "relation": relation_for(name),
        }

    macros = {}
    for mname, mpath in sorted(REGISTRY.macros.items(), key=lambda x: x[0]):
        macros[mname] = _rel_safe(mpath, project_dir)

    data = {
        "metadata": {
            "tool": "fastflowtransform",
            "generated_at": _iso_now(),
        },
        "nodes": nodes,
        "macros": macros,
        "sources": REGISTRY.sources or {},
    }
    _json_dump(manifest_path, data)
    return manifest_path

write_run_results ¶

write_run_results(project_dir, *, started_at, finished_at, node_results, budgets=None)

Write run_results.json containing run envelope and per-node results. Optionally includes a 'budgets' summary block.

Source code in src/fastflowtransform/artifacts.py

def write_run_results(
    project_dir: Path,
    *,
    started_at: str,
    finished_at: str,
    node_results: list[RunNodeResult],
    budgets: dict[str, Any] | None = None,
) -> Path:
    """
    Write run_results.json containing run envelope and per-node results.
    Optionally includes a 'budgets' summary block.
    """
    project_dir = Path(project_dir)
    out_dir = _target_dir(project_dir)
    results_path = out_dir / "run_results.json"

    data = {
        "metadata": {"tool": "fastflowtransform", "generated_at": _iso_now()},
        "run_started_at": started_at,
        "run_finished_at": finished_at,
        "results": [asdict(nr) for nr in sorted(node_results, key=lambda r: r.name)],
    }

    if budgets is not None:
        data["budgets"] = budgets

    _json_dump(results_path, data)
    return results_path

write_catalog ¶

write_catalog(project_dir, executor)

Write catalog.json: - relations: map of relation -> {columns:[{name,dtype,nullable}]}

Source code in src/fastflowtransform/artifacts.py

def write_catalog(project_dir: Path, executor: Any) -> Path:
    """
    Write catalog.json:
      - relations: map of relation -> {columns:[{name,dtype,nullable}]}
    """
    project_dir = Path(project_dir)
    out_dir = _target_dir(project_dir)
    catalog_path = out_dir / "catalog.json"

    relations: dict[str, Any] = {}
    rel_names = sorted([relation_for(n) for n in REGISTRY.nodes])
    for rel in rel_names:
        # Per-relation guard: introspection must never break artifact emission
        try:
            cols = _try_columns_for(executor, rel)
        except Exception:
            cols = []
        relations[rel] = {"columns": cols}

    data = {
        "metadata": {"tool": "fastflowtransform", "generated_at": _iso_now()},
        "relations": relations,
    }
    _json_dump(catalog_path, data)
    return catalog_path

load_last_run_durations ¶

load_last_run_durations(project_dir)

Best-effort reader for the last run_results.json.

Returns: { model_name: duration_in_seconds }. On any error or missing file: {}.

Source code in src/fastflowtransform/artifacts.py

def load_last_run_durations(project_dir: Path) -> dict[str, float]:
    """
    Best-effort reader for the last run_results.json.

    Returns: { model_name: duration_in_seconds }.
    On any error or missing file: {}.
    """
    path = _target_dir(project_dir)
    if not path.exists():
        return {}

    try:
        raw = json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return {}

    # tolerate a few possible shapes
    items: list[dict[str, Any]] = (
        raw.get("results") or raw.get("node_results") or raw.get("nodes") or []
    )

    out: dict[str, float] = {}
    for item in items:
        if not isinstance(item, dict):
            continue
        name = item.get("name")
        dur_ms = item.get("duration_ms")
        if isinstance(name, str) and isinstance(dur_ms, (int, float)):
            out[name] = float(dur_ms) / 1000.0
    return out