diff --git a/book/api/metrics-generated.md b/book/api/metrics-generated.md index e8f85684e8..2b6909a45e 100644 --- a/book/api/metrics-generated.md +++ b/book/api/metrics-generated.md @@ -526,8 +526,15 @@ | replay_​reset_​slot | gauge | The slot at which we last reset the replay stage, or 0 if unknown | | replay_​max_​live_​banks | gauge | The maximum number of banks we can have alive | | replay_​live_​banks | gauge | The number of banks we currently have alive | +| replay_​reasm_​free | gauge | The number of free FEC sets in the reassembly queue | +| replay_​reasm_​latest_​slot | gauge | Slot of the latest FEC set in the reassembly queue that can be replayed | +| replay_​reasm_​latest_​fec_​idx | gauge | FEC set index of the latest FEC set in the reassembly queue that can be replayed | | replay_​slots_​total | counter | Count of slots replayed successfully | | replay_​transactions_​total | counter | Count of transactions processed overall on the current fork | +| replay_​sched_​full | counter | Times where sched is full and a FEC set can't be processed | +| replay_​reasm_​empty | counter | Times where reasm is empty and a FEC set can't be processed | +| replay_​leader_​bid_​wait | counter | Times where replay is blocked by the the PoH tile not sending an end of leader message | +| replay_​banks_​full | counter | Times where banks are full and a FEC set can't be processed | | replay_​progcache_​rooted | counter | Number of program cache entries rooted | | replay_​progcache_​gc_​root | counter | Number of program cache entries garbage collected while rooting | | replay_​accdb_​rooted | counter | Number of account database entries rooted | diff --git a/src/disco/metrics/generated/fd_metrics_replay.c b/src/disco/metrics/generated/fd_metrics_replay.c index fbb5746603..cde7fa0390 100644 --- a/src/disco/metrics/generated/fd_metrics_replay.c +++ b/src/disco/metrics/generated/fd_metrics_replay.c @@ -15,8 +15,15 @@ const fd_metrics_meta_t FD_METRICS_REPLAY[FD_METRICS_REPLAY_TOTAL] = { DECLARE_METRIC( REPLAY_RESET_SLOT, GAUGE ), DECLARE_METRIC( REPLAY_MAX_LIVE_BANKS, GAUGE ), DECLARE_METRIC( REPLAY_LIVE_BANKS, GAUGE ), + DECLARE_METRIC( REPLAY_REASM_FREE, GAUGE ), + DECLARE_METRIC( REPLAY_REASM_LATEST_SLOT, GAUGE ), + DECLARE_METRIC( REPLAY_REASM_LATEST_FEC_IDX, GAUGE ), DECLARE_METRIC( REPLAY_SLOTS_TOTAL, COUNTER ), DECLARE_METRIC( REPLAY_TRANSACTIONS_TOTAL, COUNTER ), + DECLARE_METRIC( REPLAY_SCHED_FULL, COUNTER ), + DECLARE_METRIC( REPLAY_REASM_EMPTY, COUNTER ), + DECLARE_METRIC( REPLAY_LEADER_BID_WAIT, COUNTER ), + DECLARE_METRIC( REPLAY_BANKS_FULL, COUNTER ), DECLARE_METRIC( REPLAY_PROGCACHE_ROOTED, COUNTER ), DECLARE_METRIC( REPLAY_PROGCACHE_GC_ROOT, COUNTER ), DECLARE_METRIC( REPLAY_ACCDB_ROOTED, COUNTER ), diff --git a/src/disco/metrics/generated/fd_metrics_replay.h b/src/disco/metrics/generated/fd_metrics_replay.h index 26fbd5629d..a16788a1a1 100644 --- a/src/disco/metrics/generated/fd_metrics_replay.h +++ b/src/disco/metrics/generated/fd_metrics_replay.h @@ -96,43 +96,85 @@ #define FD_METRICS_GAUGE_REPLAY_LIVE_BANKS_DESC "The number of banks we currently have alive" #define FD_METRICS_GAUGE_REPLAY_LIVE_BANKS_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_COUNTER_REPLAY_SLOTS_TOTAL_OFF (125UL) +#define FD_METRICS_GAUGE_REPLAY_REASM_FREE_OFF (125UL) +#define FD_METRICS_GAUGE_REPLAY_REASM_FREE_NAME "replay_reasm_free" +#define FD_METRICS_GAUGE_REPLAY_REASM_FREE_TYPE (FD_METRICS_TYPE_GAUGE) +#define FD_METRICS_GAUGE_REPLAY_REASM_FREE_DESC "The number of free FEC sets in the reassembly queue" +#define FD_METRICS_GAUGE_REPLAY_REASM_FREE_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_SLOT_OFF (126UL) +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_SLOT_NAME "replay_reasm_latest_slot" +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_SLOT_TYPE (FD_METRICS_TYPE_GAUGE) +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_SLOT_DESC "Slot of the latest FEC set in the reassembly queue that can be replayed" +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_SLOT_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_FEC_IDX_OFF (127UL) +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_FEC_IDX_NAME "replay_reasm_latest_fec_idx" +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_FEC_IDX_TYPE (FD_METRICS_TYPE_GAUGE) +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_FEC_IDX_DESC "FEC set index of the latest FEC set in the reassembly queue that can be replayed" +#define FD_METRICS_GAUGE_REPLAY_REASM_LATEST_FEC_IDX_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_COUNTER_REPLAY_SLOTS_TOTAL_OFF (128UL) #define FD_METRICS_COUNTER_REPLAY_SLOTS_TOTAL_NAME "replay_slots_total" #define FD_METRICS_COUNTER_REPLAY_SLOTS_TOTAL_TYPE (FD_METRICS_TYPE_COUNTER) #define FD_METRICS_COUNTER_REPLAY_SLOTS_TOTAL_DESC "Count of slots replayed successfully" #define FD_METRICS_COUNTER_REPLAY_SLOTS_TOTAL_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_OFF (126UL) +#define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_OFF (129UL) #define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_NAME "replay_transactions_total" #define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_TYPE (FD_METRICS_TYPE_COUNTER) #define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_DESC "Count of transactions processed overall on the current fork" #define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_OFF (127UL) +#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_OFF (130UL) +#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_NAME "replay_sched_full" +#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_TYPE (FD_METRICS_TYPE_COUNTER) +#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_DESC "Times where sched is full and a FEC set can't be processed" +#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_OFF (131UL) +#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_NAME "replay_reasm_empty" +#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_TYPE (FD_METRICS_TYPE_COUNTER) +#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_DESC "Times where reasm is empty and a FEC set can't be processed" +#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_OFF (132UL) +#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_NAME "replay_leader_bid_wait" +#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_TYPE (FD_METRICS_TYPE_COUNTER) +#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_DESC "Times where replay is blocked by the the PoH tile not sending an end of leader message" +#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_OFF (133UL) +#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_NAME "replay_banks_full" +#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_TYPE (FD_METRICS_TYPE_COUNTER) +#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_DESC "Times where banks are full and a FEC set can't be processed" +#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_OFF (134UL) #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_NAME "replay_progcache_rooted" #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_TYPE (FD_METRICS_TYPE_COUNTER) #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_DESC "Number of program cache entries rooted" #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_OFF (128UL) +#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_OFF (135UL) #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_NAME "replay_progcache_gc_root" #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_TYPE (FD_METRICS_TYPE_COUNTER) #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_DESC "Number of program cache entries garbage collected while rooting" #define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_OFF (129UL) +#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_OFF (136UL) #define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_NAME "replay_accdb_rooted" #define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_TYPE (FD_METRICS_TYPE_COUNTER) #define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_DESC "Number of account database entries rooted" #define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_OFF (130UL) +#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_OFF (137UL) #define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_NAME "replay_accdb_gc_root" #define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_TYPE (FD_METRICS_TYPE_COUNTER) #define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_DESC "Number of account database entries garbage collected" #define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_REPLAY_TOTAL (19UL) +#define FD_METRICS_REPLAY_TOTAL (26UL) extern const fd_metrics_meta_t FD_METRICS_REPLAY[FD_METRICS_REPLAY_TOTAL]; #endif /* HEADER_fd_src_disco_metrics_generated_fd_metrics_replay_h */ diff --git a/src/disco/metrics/metrics.xml b/src/disco/metrics/metrics.xml index 9724be96b7..d3b1c09e4f 100644 --- a/src/disco/metrics/metrics.xml +++ b/src/disco/metrics/metrics.xml @@ -794,9 +794,18 @@ metric introduced. + + + + + + + + + diff --git a/src/discof/replay/fd_replay_tile.c b/src/discof/replay/fd_replay_tile.c index 093e2d8d98..263ee7cabd 100644 --- a/src/discof/replay/fd_replay_tile.c +++ b/src/discof/replay/fd_replay_tile.c @@ -406,6 +406,14 @@ struct fd_replay_tile { ulong slots_total; ulong transactions_total; + + ulong reasm_latest_slot; + ulong reasm_latest_fec_idx; + + ulong sched_full; + ulong reasm_empty; + ulong leader_bid_wait; + ulong banks_full; } metrics; uchar __attribute__((aligned(FD_MULTI_EPOCH_LEADERS_ALIGN))) mleaders_mem[ FD_MULTI_EPOCH_LEADERS_FOOTPRINT ]; @@ -470,9 +478,20 @@ metrics_write( fd_replay_tile_t * ctx ) { ulong live_banks = fd_banks_pool_max( bank_pool ) - fd_banks_pool_free( bank_pool ); FD_MGAUGE_SET( REPLAY, LIVE_BANKS, live_banks ); + ulong reasm_free = fd_reasm_free( ctx->reasm ); + FD_MGAUGE_SET( REPLAY, REASM_FREE, reasm_free ); + FD_MCNT_SET( REPLAY, SLOTS_TOTAL, ctx->metrics.slots_total ); FD_MCNT_SET( REPLAY, TRANSACTIONS_TOTAL, ctx->metrics.transactions_total ); + FD_MGAUGE_SET( REPLAY, REASM_LATEST_SLOT, ctx->metrics.reasm_latest_slot ); + FD_MGAUGE_SET( REPLAY, REASM_LATEST_FEC_IDX, ctx->metrics.reasm_latest_fec_idx ); + + FD_MCNT_SET( REPLAY, SCHED_FULL, ctx->metrics.sched_full ); + FD_MCNT_SET( REPLAY, REASM_EMPTY, ctx->metrics.reasm_empty ); + FD_MCNT_SET( REPLAY, LEADER_BID_WAIT, ctx->metrics.leader_bid_wait ); + FD_MCNT_SET( REPLAY, BANKS_FULL, ctx->metrics.banks_full ); + FD_MCNT_SET( REPLAY, PROGCACHE_ROOTED, ctx->progcache_admin->metrics.root_cnt ); FD_MCNT_SET( REPLAY, PROGCACHE_GC_ROOT, ctx->progcache_admin->metrics.gc_root_cnt ); @@ -1600,8 +1619,18 @@ replay( fd_replay_tile_t * ctx, static int can_process_fec( fd_replay_tile_t * ctx ) { fd_reasm_fec_t * fec; - if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched, 1UL ) ) ) return 0; - if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) return 0; + if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched, 1UL ) ) ) { + ctx->metrics.sched_full++; + return 0; + } + + if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) { + ctx->metrics.reasm_empty++; + return 0; + } + + ctx->metrics.reasm_latest_slot = fec->slot; + ctx->metrics.reasm_latest_fec_idx = fec->fec_set_idx; if( FD_UNLIKELY( ctx->is_leader && fec->fec_set_idx==0U && fd_reasm_parent( ctx->reasm, fec )->bank_idx==ctx->leader_bank->idx ) ) { /* There's a race that's exceedingly rare, where we receive the @@ -1618,12 +1647,16 @@ can_process_fec( fd_replay_tile_t * ctx ) { ordering invariants in banks and sched. */ FD_TEST( ctx->recv_block_id ); FD_TEST( !ctx->recv_poh ); + ctx->metrics.leader_bid_wait++; return 0; } /* If fec_set_idx is 0, we need a new bank for a new slot. Banks must not be full in this case. */ - if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && fec->fec_set_idx==0 ) ) return 0; + if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && fec->fec_set_idx==0 ) ) { + ctx->metrics.banks_full++; + return 0; + } /* Otherwise, banks may not be full, so we can always create a new bank if needed. Or, if banks are full, the current fec set's