Skip to content

fastflowtransform.executors.bigquery._bigquery_mixin

BigQueryIdentifierMixin

Bases: SqlIdentifierMixin

Mixin that provides common BigQuery helpers (identifier quoting, dataset creation). Expect subclasses to define: self.project, self.dataset, self.client.

Source code in src/fastflowtransform/executors/bigquery/_bigquery_mixin.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class BigQueryIdentifierMixin(SqlIdentifierMixin):
    """
    Mixin that provides common BigQuery helpers (identifier quoting, dataset creation).
    Expect subclasses to define: self.project, self.dataset, self.client.
    """

    project: str
    dataset: str
    client: bigquery.Client

    def _bq_quote(self, value: str) -> str:
        return value.replace("`", "\\`")

    def _quote_identifier(self, ident: str) -> str:
        return self._bq_quote(ident)

    def _default_schema(self) -> str | None:
        return self.dataset

    def _default_catalog(self) -> str | None:
        return self.project

    def _should_include_catalog(
        self, catalog: str | None, schema: str | None, *, explicit: bool
    ) -> bool:
        # BigQuery always expects a project + dataset.
        return True

    def _qualify_identifier(
        self,
        ident: str,
        *,
        schema: str | None = None,
        catalog: str | None = None,
        quote: bool = True,
    ) -> str:
        proj = self._clean_part(catalog) or self._default_catalog()
        dset = self._clean_part(schema) or self._default_schema()
        normalized = self._normalize_identifier(ident)
        parts = [proj, dset, normalized]
        if not quote:
            return ".".join(p for p in parts if p)
        return f"`{'.'.join(self._bq_quote(p) for p in parts if p)}`"

    def _qualified_identifier(
        self, relation: str, project: str | None = None, dataset: str | None = None
    ) -> str:
        return self._qualify_identifier(relation, schema=dataset, catalog=project)

    def _ensure_dataset(self) -> None:
        ds_id = f"{self.project}.{self.dataset}"
        try:
            self.client.get_dataset(ds_id)
            return
        except NotFound:
            if not getattr(self, "allow_create_dataset", False):
                raise

        ds_obj = bigquery.Dataset(ds_id)
        if getattr(self, "location", None):
            ds_obj.location = self.location  # type: ignore[attr-defined]
        self.client.create_dataset(ds_obj, exists_ok=True)