Skip to content

Commit b3e7c30

Browse files
authored
Merge pull request #13 from microsoft/fix/metrics-resilience-detections-dir
fix: make metrics upload resilient; ensure detections.csv dir exists
2 parents fbd3ed9 + 72990ea commit b3e7c30

File tree

2 files changed

+36
-11
lines changed

2 files changed

+36
-11
lines changed

sparrow/inference.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ def preprocess_classification(img):
264264

265265
# CSV for logging detections
266266
csv_file = '/app/static/data/detections.csv'
267+
os.makedirs(os.path.dirname(csv_file), exist_ok=True)
267268

268269
def write_to_csv(image_name, detection, confidence, date):
269270
"""Append detection results to CSV."""

sparrow/rest_client.py

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ def process_and_upload_audio():
441441
def gather_system_metrics():
442442
"""
443443
Gathers system metrics including CPU, memory, disk, temperature/humidity, solar, and pressure.
444+
Always returns a dict; sensor failures become None values.
444445
"""
445446
metrics = {
446447
"auth_key": auth_key,
@@ -453,15 +454,30 @@ def gather_system_metrics():
453454
"network_received": psutil.net_io_counters().bytes_recv,
454455
"uptime_seconds": int(time.time() - psutil.boot_time()),
455456
}
457+
458+
try:
459+
env = read_env(bus, SENSOR_STATE)
460+
metrics["temperature_celsius"] = env.get("t_c")
461+
metrics["humidity_percent"] = env.get("rh_pct")
462+
metrics["bme688_pressure_pa"] = env.get("p_pa")
463+
metrics["bme688_temperature_celsius"] = env.get("t_bme_c")
464+
metrics["bme688_humidity_percent"] = env.get("rh_bme_pct")
465+
except Exception as e:
466+
logger.error(f"Sensor read failed inside gather_system_metrics(): {e}")
467+
metrics.update({
468+
"temperature_celsius": None,
469+
"humidity_percent": None,
470+
"bme688_pressure_pa": None,
471+
"bme688_temperature_celsius": None,
472+
"bme688_humidity_percent": None,
473+
})
456474

457-
env = read_env(bus, SENSOR_STATE)
458-
metrics["temperature_celsius"] = env["t_c"]
459-
metrics["humidity_percent"] = env["rh_pct"]
460-
metrics["bme688_pressure_pa"] = env["p_pa"]
461-
metrics["bme688_temperature_celsius"] = env["t_bme_c"]
462-
metrics["bme688_humidity_percent"] = env["rh_bme_pct"]
475+
try:
476+
ppv, yields, ved_v, ved_load_p = read_solar_generation()
477+
except Exception as e:
478+
logger.error(f"VE.Direct read failed: {e}")
479+
ppv, yields, ved_v, ved_load_p = None, {}, None, None
463480

464-
ppv, yields, ved_v, ved_load_p = read_solar_generation()
465481
metrics["solar_generation_watts"] = round(ppv, 2) if ppv is not None else None
466482
metrics["yield_today_wh"] = yields.get(0) if isinstance(yields, dict) else None
467483
metrics["yield_yesterday_wh"] = yields.get(1) if isinstance(yields, dict) else None
@@ -473,16 +489,24 @@ def gather_system_metrics():
473489

474490
def send_system_metrics():
475491
"""Send the latest system metrics to the server, appending to backlog on failure."""
476-
send_backlog_metrics()
477-
metrics = gather_system_metrics()
478-
logger.info(f"Sending system metrics to {system_metrics_url}")
492+
logger.info("Metrics job: starting send_system_metrics()")
479493
try:
494+
send_backlog_metrics() # this already checks connectivity for backlog replay
495+
except Exception as e:
496+
logger.error(f"Backlog replay failed: {e}")
497+
498+
try:
499+
metrics = gather_system_metrics()
500+
logger.info(f"Sending system metrics to {system_metrics_url}")
480501
response = requests.post(system_metrics_url, json=metrics, timeout=10)
481502
response.raise_for_status()
482503
logger.info(f"Successfully sent system metrics: {response.status_code}")
483504
except requests.exceptions.RequestException as e:
484-
logger.error(f"Failed to send system metrics: {e}")
505+
logger.error(f"Failed to POST system metrics: {e}")
485506
append_metric_to_backlog(metrics)
507+
except Exception as e:
508+
logger.critical(f"send_system_metrics() unexpected error before POST: {e}", exc_info=True)
509+
486510

487511
# Scheduling
488512
executor = ThreadPoolExecutor(max_workers=5)

0 commit comments

Comments
 (0)