Skip to content

Commit 8932af9

Browse files
committed
Fix for_table to use caching
1 parent cc1c4ff commit 8932af9

File tree

3 files changed

+30
-18
lines changed

3 files changed

+30
-18
lines changed

bigquery_etl/dryrun.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pickle
1919
import random
2020
import re
21+
import shutil
2122
import sys
2223
import tempfile
2324
import time
@@ -198,6 +199,17 @@ def skipped_files(sql_dir=ConfigLoader.get("default", "sql_dir")) -> Set[str]:
198199

199200
return skip_files
200201

202+
@staticmethod
203+
def clear_cache():
204+
"""Clear dry run cache directory."""
205+
cache_dir = os.path.join(tempfile.gettempdir(), "bigquery_etl_dryrun_cache")
206+
if os.path.exists(cache_dir):
207+
try:
208+
shutil.rmtree(cache_dir)
209+
print(f"Cleared dry run cache at {cache_dir}")
210+
except OSError as e:
211+
print(f"Warning: Failed to clear dry run cache: {e}")
212+
201213
def skip(self):
202214
"""Determine if dry run should be skipped."""
203215
return self.respect_skip and self.sqlfile in self.skipped_files(
@@ -470,8 +482,9 @@ def dry_run_result(self):
470482

471483
self.dry_run_duration = time.time() - start_time
472484

473-
# Save to cache (if caching is enabled)
474-
if self.use_cache:
485+
# Save to cache (if caching is enabled and result is valid)
486+
# Don't cache errors to allow retries
487+
if self.use_cache and result.get("valid"):
475488
self._save_cached_result(cache_key, result)
476489

477490
return result

bigquery_etl/schema/__init__.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from google.cloud.bigquery import SchemaField
1414

1515
from .. import dryrun
16+
from ..config import ConfigLoader
1617

1718
SCHEMA_FILE = "schema.yaml"
1819

@@ -58,24 +59,34 @@ def from_json(cls, json_schema):
5859
return cls(json_schema)
5960

6061
@classmethod
61-
def for_table(cls, project, dataset, table, partitioned_by=None, *args, **kwargs):
62+
def for_table(
63+
cls,
64+
project,
65+
dataset,
66+
table,
67+
partitioned_by=None,
68+
filename="query.sql",
69+
*args,
70+
**kwargs,
71+
):
6272
"""Get the schema for a BigQuery table."""
6373
query = f"SELECT * FROM `{project}.{dataset}.{table}`"
6474

6575
if partitioned_by:
6676
query += f" WHERE DATE(`{partitioned_by}`) = DATE('2020-01-01')"
6777

6878
try:
79+
sql_dir = ConfigLoader.get("default", "sql_dir")
6980
return cls(
7081
dryrun.DryRun(
71-
os.path.join(project, dataset, table, "query.sql"),
82+
os.path.join(sql_dir, project, dataset, table, filename),
7283
query,
7384
project=project,
7485
dataset=dataset,
7586
table=table,
7687
*args,
7788
**kwargs,
78-
).get_schema()
89+
).get_table_schema()
7990
)
8091
except Exception as e:
8192
print(f"Cannot get schema for {project}.{dataset}.{table}: {e}")

bigquery_etl/schema/stable_table_schema.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import json
44
import os
55
import pickle
6-
import shutil
76
import tarfile
87
import tempfile
98
import urllib.request
@@ -52,17 +51,6 @@ def sortkey(self):
5251
)
5352

5453

55-
def _clear_dryrun_cache():
56-
"""Clear dry run cache when new schemas are downloaded."""
57-
cache_dir = os.path.join(tempfile.gettempdir(), "bigquery_etl_dryrun_cache")
58-
if os.path.exists(cache_dir):
59-
try:
60-
shutil.rmtree(cache_dir)
61-
print(f"Cleared dry run cache at {cache_dir}")
62-
except OSError as e:
63-
print(f"Warning: Failed to clear dry run cache: {e}")
64-
65-
6654
def prod_schemas_uri():
6755
"""Return URI for the schemas tarball deployed to shared-prod.
6856
@@ -105,7 +93,7 @@ def get_stable_table_schemas() -> List[SchemaFile]:
10593

10694
# Clear dry run cache when downloading new schemas
10795
# Schema changes could affect dry run results
108-
_clear_dryrun_cache()
96+
DryRun.clear_cache()
10997

11098
with urllib.request.urlopen(schemas_uri) as f:
11199
tarbytes = BytesIO(f.read())

0 commit comments

Comments
 (0)