1818import pickle
1919import random
2020import re
21+ import shutil
2122import sys
2223import tempfile
2324import time
@@ -198,6 +199,17 @@ def skipped_files(sql_dir=ConfigLoader.get("default", "sql_dir")) -> Set[str]:
198199
199200 return skip_files
200201
202+ @staticmethod
203+ def clear_cache ():
204+ """Clear dry run cache directory."""
205+ cache_dir = Path (tempfile .gettempdir ()) / "bigquery_etl_dryrun_cache"
206+ if cache_dir .exists ():
207+ try :
208+ shutil .rmtree (cache_dir )
209+ print (f"Cleared dry run cache at { cache_dir } " )
210+ except OSError as e :
211+ print (f"Warning: Failed to clear dry run cache: { e } " )
212+
201213 def skip (self ):
202214 """Determine if dry run should be skipped."""
203215 return self .respect_skip and self .sqlfile in self .skipped_files (
@@ -241,41 +253,52 @@ def _get_cached_result(self, cache_key, ttl_seconds=None):
241253 if ttl_seconds is None :
242254 ttl_seconds = ConfigLoader .get ("dry_run" , "cache_ttl_seconds" , fallback = 900 )
243255
244- cache_dir = os .path .join (tempfile .gettempdir (), "bigquery_etl_dryrun_cache" )
245- os .makedirs (cache_dir , exist_ok = True )
246- cache_file = os .path .join (cache_dir , f"dryrun_{ cache_key } .pkl" )
256+ cache_dir = Path (tempfile .gettempdir ()) / "bigquery_etl_dryrun_cache"
257+ cache_dir .mkdir (parents = True , exist_ok = True )
258+ cache_file = cache_dir / f"dryrun_{ cache_key } .pkl"
259+
260+ try :
261+ if not cache_file .exists ():
262+ return None
247263
248- if os .path .exists (cache_file ):
249264 # check if cache is expired
250- file_age = time .time () - os . path . getmtime ( cache_file )
265+ file_age = time .time () - cache_file . stat (). st_mtime
251266 if file_age > ttl_seconds :
252267 try :
253- os . remove ( cache_file )
268+ cache_file . unlink ( )
254269 except OSError :
255270 pass
256271 return None
257272
273+ cached_data = pickle .loads (cache_file .read_bytes ())
274+ cache_age = time .time () - cache_file .stat ().st_mtime
275+ print (f"[DRYRUN CACHE HIT] { self .sqlfile } (age: { cache_age :.0f} s)" )
276+ return cached_data
277+ except (pickle .PickleError , EOFError , OSError , FileNotFoundError ) as e :
278+ print (f"[DRYRUN CACHE] Failed to load cache: { e } " )
258279 try :
259- with open (cache_file , "rb" ) as f :
260- cached_data = pickle .load (f )
261- cache_age = time .time () - os .path .getmtime (cache_file )
262- print (f"[DRYRUN CACHE HIT] { self .sqlfile } (age: { cache_age :.0f} s)" )
263- return cached_data
264- except (pickle .PickleError , EOFError , OSError ) as e :
265- print (f"[DRYRUN CACHE] Failed to load cache: { e } " )
266- return None
267-
268- return None
280+ if cache_file .exists ():
281+ cache_file .unlink ()
282+ except OSError :
283+ pass
284+ return None
269285
270286 def _save_cached_result (self , cache_key , result ):
271- """Save dry run result to disk cache."""
272- cache_dir = os . path . join (tempfile .gettempdir (), "bigquery_etl_dryrun_cache" )
273- os . makedirs ( cache_dir , exist_ok = True )
274- cache_file = os . path . join ( cache_dir , f"dryrun_{ cache_key } .pkl" )
287+ """Save dry run result to disk cache using atomic write ."""
288+ cache_dir = Path (tempfile .gettempdir ()) / "bigquery_etl_dryrun_cache"
289+ cache_dir . mkdir ( parents = True , exist_ok = True )
290+ cache_file = cache_dir / f"dryrun_{ cache_key } .pkl"
275291
276292 try :
277- with open (cache_file , "wb" ) as f :
293+ # write to temporary file first, then atomically rename
294+ # this prevents race conditions where readers get partial files
295+ temp_file = Path (str (cache_file ) + f".tmp.{ os .getpid ()} " )
296+ with open (temp_file , "wb" ) as f :
278297 pickle .dump (result , f )
298+ f .flush ()
299+ os .fsync (f .fileno ()) # Ensure data is written to disk
300+
301+ temp_file .replace (cache_file )
279302
280303 # save table metadata separately if present
281304 if (
@@ -291,49 +314,73 @@ def _save_cached_result(self, cache_key, result):
291314 )
292315 except (pickle .PickleError , OSError ) as e :
293316 print (f"[DRYRUN CACHE] Failed to save cache: { e } " )
317+ try :
318+ temp_file = Path (str (cache_file ) + f".tmp.{ os .getpid ()} " )
319+ if temp_file .exists ():
320+ temp_file .unlink ()
321+ except OSError :
322+ pass
294323
295324 def _get_cached_table_metadata (self , table_identifier , ttl_seconds = None ):
296325 """Load cached table metadata from disk based on table identifier."""
297326 if ttl_seconds is None :
298327 ttl_seconds = ConfigLoader .get ("dry_run" , "cache_ttl_seconds" , fallback = 900 )
299328
300- cache_dir = os . path . join (tempfile .gettempdir (), "bigquery_etl_dryrun_cache" )
301- os . makedirs ( cache_dir , exist_ok = True )
329+ cache_dir = Path (tempfile .gettempdir ()) / "bigquery_etl_dryrun_cache"
330+ cache_dir . mkdir ( parents = True , exist_ok = True )
302331 # table identifier as cache key
303332 table_cache_key = hashlib .sha256 (table_identifier .encode ()).hexdigest ()
304- cache_file = os .path .join (cache_dir , f"table_metadata_{ table_cache_key } .pkl" )
333+ cache_file = cache_dir / f"table_metadata_{ table_cache_key } .pkl"
334+
335+ try :
336+ if not cache_file .exists ():
337+ return None
305338
306- if os .path .exists (cache_file ):
307339 # check if cache is expired
308- file_age = time .time () - os . path . getmtime ( cache_file )
340+ file_age = time .time () - cache_file . stat (). st_mtime
309341
310342 if file_age > ttl_seconds :
311343 try :
312- os . remove ( cache_file )
344+ cache_file . unlink ( )
313345 except OSError :
314346 pass
315347 return None
316348
349+ cached_data = pickle .loads (cache_file .read_bytes ())
350+ return cached_data
351+ except (pickle .PickleError , EOFError , OSError , FileNotFoundError ) as e :
352+ print (f"[TABLE METADATA] Failed to load cache for { table_identifier } : { e } " )
317353 try :
318- with open (cache_file , "rb" ) as f :
319- cached_data = pickle .load (f )
320- return cached_data
321- except (pickle .PickleError , EOFError , OSError ):
322- return None
323- return None
354+ if cache_file .exists ():
355+ cache_file .unlink ()
356+ except OSError :
357+ pass
358+ return None
324359
325360 def _save_cached_table_metadata (self , table_identifier , metadata ):
326- """Save table metadata to disk cache."""
327- cache_dir = os . path . join (tempfile .gettempdir (), "bigquery_etl_dryrun_cache" )
328- os . makedirs ( cache_dir , exist_ok = True )
361+ """Save table metadata to disk cache using atomic write ."""
362+ cache_dir = Path (tempfile .gettempdir ()) / "bigquery_etl_dryrun_cache"
363+ cache_dir . mkdir ( parents = True , exist_ok = True )
329364 table_cache_key = hashlib .sha256 (table_identifier .encode ()).hexdigest ()
330- cache_file = os . path . join ( cache_dir , f"table_metadata_{ table_cache_key } .pkl" )
365+ cache_file = cache_dir / f"table_metadata_{ table_cache_key } .pkl"
331366
332367 try :
333- with open (cache_file , "wb" ) as f :
368+ # write to temporary file first, then atomically rename
369+ temp_file = Path (str (cache_file ) + f".tmp.{ os .getpid ()} " )
370+ with open (temp_file , "wb" ) as f :
334371 pickle .dump (metadata , f )
372+ f .flush ()
373+ os .fsync (f .fileno ())
374+
375+ temp_file .replace (cache_file )
335376 except (pickle .PickleError , OSError ) as e :
336377 print (f"[TABLE METADATA] Failed to save cache for { table_identifier } : { e } " )
378+ try :
379+ temp_file = Path (str (cache_file ) + f".tmp.{ os .getpid ()} " )
380+ if temp_file .exists ():
381+ temp_file .unlink ()
382+ except OSError :
383+ pass
337384
338385 @cached_property
339386 def dry_run_result (self ):
@@ -343,7 +390,7 @@ def dry_run_result(self):
343390 else :
344391 sql = self .get_sql ()
345392
346- # Check cache first (if caching is enabled)
393+ # check cache first (if caching is enabled)
347394 if sql is not None and self .use_cache :
348395 cache_key = self ._get_cache_key (sql )
349396 cached_result = self ._get_cached_result (cache_key )
@@ -470,8 +517,9 @@ def dry_run_result(self):
470517
471518 self .dry_run_duration = time .time () - start_time
472519
473- # Save to cache (if caching is enabled)
474- if self .use_cache :
520+ # Save to cache (if caching is enabled and result is valid)
521+ # Don't cache errors to allow retries
522+ if self .use_cache and result .get ("valid" ):
475523 self ._save_cached_result (cache_key , result )
476524
477525 return result
0 commit comments