Skip to content

fastflowtransform.executors.budget.runtime.bigquery

BigQueryBudgetRuntime

Bases: BaseBudgetRuntime[BigQueryBudgetExecutor]

BigQuery budget runtime using dry-run estimation.

Source code in src/fastflowtransform/executors/budget/runtime/bigquery.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class BigQueryBudgetRuntime(BaseBudgetRuntime[BigQueryBudgetExecutor]):
    """BigQuery budget runtime using dry-run estimation."""

    DEFAULT_GUARD = BudgetGuard(
        env_var="FF_BQ_MAX_BYTES",
        estimator_attr="runtime_budget_estimate_query_bytes",
        engine_label="BigQuery",
        what="query",
    )

    def estimate_query_bytes(self, sql: str) -> int | None:
        """
        Estimate bytes for a BigQuery SQL statement using a dry-run.

        Returns the estimated bytes, or None if estimation is not possible.
        """
        cfg = bigquery.QueryJobConfig(
            dry_run=True,
            use_query_cache=False,
        )
        if self.executor.dataset:
            cfg.default_dataset = bigquery.DatasetReference(
                self.executor.project, self.executor.dataset
            )

        try:
            job = self.executor.client.query(
                sql,
                job_config=cfg,
                location=self.executor.location,
            )
            job.result()
        except Exception:
            return None

        try:
            return int(getattr(job, "total_bytes_processed", 0) or 0)
        except Exception:
            return None

estimate_query_bytes

estimate_query_bytes(sql)

Estimate bytes for a BigQuery SQL statement using a dry-run.

Returns the estimated bytes, or None if estimation is not possible.

Source code in src/fastflowtransform/executors/budget/runtime/bigquery.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def estimate_query_bytes(self, sql: str) -> int | None:
    """
    Estimate bytes for a BigQuery SQL statement using a dry-run.

    Returns the estimated bytes, or None if estimation is not possible.
    """
    cfg = bigquery.QueryJobConfig(
        dry_run=True,
        use_query_cache=False,
    )
    if self.executor.dataset:
        cfg.default_dataset = bigquery.DatasetReference(
            self.executor.project, self.executor.dataset
        )

    try:
        job = self.executor.client.query(
            sql,
            job_config=cfg,
            location=self.executor.location,
        )
        job.result()
    except Exception:
        return None

    try:
        return int(getattr(job, "total_bytes_processed", 0) or 0)
    except Exception:
        return None