Skip to content

Commit da1aa8c

Browse files
replay: adding counters for replay-blocking conditions
1 parent 8922713 commit da1aa8c

File tree

5 files changed

+68
-8
lines changed

5 files changed

+68
-8
lines changed

book/api/metrics-generated.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,10 @@
528528
| <span class="metrics-name">replay_&#8203;live_&#8203;banks</span> | gauge | The number of banks we currently have alive |
529529
| <span class="metrics-name">replay_&#8203;slots_&#8203;total</span> | counter | Count of slots replayed successfully |
530530
| <span class="metrics-name">replay_&#8203;transactions_&#8203;total</span> | counter | Count of transactions processed overall on the current fork |
531+
| <span class="metrics-name">replay_&#8203;sched_&#8203;full</span> | counter | Times where sched is full and a FEC set can't be processed |
532+
| <span class="metrics-name">replay_&#8203;reasm_&#8203;empty</span> | counter | Times where reasm is empty and a FEC set can't be processed |
533+
| <span class="metrics-name">replay_&#8203;leader_&#8203;bid_&#8203;wait</span> | counter | Times where replay is blocked by the the PoH tile not sending an end of leader message |
534+
| <span class="metrics-name">replay_&#8203;banks_&#8203;full</span> | counter | Times where banks are full and a FEC set can't be processed |
531535
| <span class="metrics-name">replay_&#8203;progcache_&#8203;rooted</span> | counter | Number of program cache entries rooted |
532536
| <span class="metrics-name">replay_&#8203;progcache_&#8203;gc_&#8203;root</span> | counter | Number of program cache entries garbage collected while rooting |
533537
| <span class="metrics-name">replay_&#8203;accdb_&#8203;rooted</span> | counter | Number of account database entries rooted |

src/disco/metrics/generated/fd_metrics_replay.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ const fd_metrics_meta_t FD_METRICS_REPLAY[FD_METRICS_REPLAY_TOTAL] = {
1717
DECLARE_METRIC( REPLAY_LIVE_BANKS, GAUGE ),
1818
DECLARE_METRIC( REPLAY_SLOTS_TOTAL, COUNTER ),
1919
DECLARE_METRIC( REPLAY_TRANSACTIONS_TOTAL, COUNTER ),
20+
DECLARE_METRIC( REPLAY_SCHED_FULL, COUNTER ),
21+
DECLARE_METRIC( REPLAY_REASM_EMPTY, COUNTER ),
22+
DECLARE_METRIC( REPLAY_LEADER_BID_WAIT, COUNTER ),
23+
DECLARE_METRIC( REPLAY_BANKS_FULL, COUNTER ),
2024
DECLARE_METRIC( REPLAY_PROGCACHE_ROOTED, COUNTER ),
2125
DECLARE_METRIC( REPLAY_PROGCACHE_GC_ROOT, COUNTER ),
2226
DECLARE_METRIC( REPLAY_ACCDB_ROOTED, COUNTER ),

src/disco/metrics/generated/fd_metrics_replay.h

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,31 +108,55 @@
108108
#define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_DESC "Count of transactions processed overall on the current fork"
109109
#define FD_METRICS_COUNTER_REPLAY_TRANSACTIONS_TOTAL_CVT (FD_METRICS_CONVERTER_NONE)
110110

111-
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_OFF (127UL)
111+
#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_OFF (127UL)
112+
#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_NAME "replay_sched_full"
113+
#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_TYPE (FD_METRICS_TYPE_COUNTER)
114+
#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_DESC "Times where sched is full and a FEC set can't be processed"
115+
#define FD_METRICS_COUNTER_REPLAY_SCHED_FULL_CVT (FD_METRICS_CONVERTER_NONE)
116+
117+
#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_OFF (128UL)
118+
#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_NAME "replay_reasm_empty"
119+
#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_TYPE (FD_METRICS_TYPE_COUNTER)
120+
#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_DESC "Times where reasm is empty and a FEC set can't be processed"
121+
#define FD_METRICS_COUNTER_REPLAY_REASM_EMPTY_CVT (FD_METRICS_CONVERTER_NONE)
122+
123+
#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_OFF (129UL)
124+
#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_NAME "replay_leader_bid_wait"
125+
#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_TYPE (FD_METRICS_TYPE_COUNTER)
126+
#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_DESC "Times where replay is blocked by the the PoH tile not sending an end of leader message"
127+
#define FD_METRICS_COUNTER_REPLAY_LEADER_BID_WAIT_CVT (FD_METRICS_CONVERTER_NONE)
128+
129+
#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_OFF (130UL)
130+
#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_NAME "replay_banks_full"
131+
#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_TYPE (FD_METRICS_TYPE_COUNTER)
132+
#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_DESC "Times where banks are full and a FEC set can't be processed"
133+
#define FD_METRICS_COUNTER_REPLAY_BANKS_FULL_CVT (FD_METRICS_CONVERTER_NONE)
134+
135+
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_OFF (131UL)
112136
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_NAME "replay_progcache_rooted"
113137
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_TYPE (FD_METRICS_TYPE_COUNTER)
114138
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_DESC "Number of program cache entries rooted"
115139
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_ROOTED_CVT (FD_METRICS_CONVERTER_NONE)
116140

117-
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_OFF (128UL)
141+
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_OFF (132UL)
118142
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_NAME "replay_progcache_gc_root"
119143
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_TYPE (FD_METRICS_TYPE_COUNTER)
120144
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_DESC "Number of program cache entries garbage collected while rooting"
121145
#define FD_METRICS_COUNTER_REPLAY_PROGCACHE_GC_ROOT_CVT (FD_METRICS_CONVERTER_NONE)
122146

123-
#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_OFF (129UL)
147+
#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_OFF (133UL)
124148
#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_NAME "replay_accdb_rooted"
125149
#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_TYPE (FD_METRICS_TYPE_COUNTER)
126150
#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_DESC "Number of account database entries rooted"
127151
#define FD_METRICS_COUNTER_REPLAY_ACCDB_ROOTED_CVT (FD_METRICS_CONVERTER_NONE)
128152

129-
#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_OFF (130UL)
153+
#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_OFF (134UL)
130154
#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_NAME "replay_accdb_gc_root"
131155
#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_TYPE (FD_METRICS_TYPE_COUNTER)
132156
#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_DESC "Number of account database entries garbage collected"
133157
#define FD_METRICS_COUNTER_REPLAY_ACCDB_GC_ROOT_CVT (FD_METRICS_CONVERTER_NONE)
134158

135-
#define FD_METRICS_REPLAY_TOTAL (19UL)
159+
#define FD_METRICS_REPLAY_TOTAL (23UL)
136160
extern const fd_metrics_meta_t FD_METRICS_REPLAY[FD_METRICS_REPLAY_TOTAL];
137161

138162
#endif /* HEADER_fd_src_disco_metrics_generated_fd_metrics_replay_h */

src/disco/metrics/metrics.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,11 @@ metric introduced.
797797
<counter name="SlotsTotal" summary="Count of slots replayed successfully" />
798798
<counter name="TransactionsTotal" summary="Count of transactions processed overall on the current fork" />
799799

800+
<counter name="SchedFull" summary="Times where sched is full and a FEC set can't be processed" />
801+
<counter name="ReasmEmpty" summary="Times where reasm is empty and a FEC set can't be processed" />
802+
<counter name="LeaderBidWait" summary="Times where replay is blocked by the the PoH tile not sending an end of leader message" />
803+
<counter name="BanksFull" summary="Times where banks are full and a FEC set can't be processed" />
804+
800805
<counter name="ProgcacheRooted" summary="Number of program cache entries rooted" />
801806
<counter name="ProgcacheGcRoot" summary="Number of program cache entries garbage collected while rooting" />
802807

src/discof/replay/fd_replay_tile.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,11 @@ struct fd_replay_tile {
406406

407407
ulong slots_total;
408408
ulong transactions_total;
409+
410+
ulong sched_full;
411+
ulong reasm_empty;
412+
ulong leader_bid_wait;
413+
ulong banks_full;
409414
} metrics;
410415

411416
uchar __attribute__((aligned(FD_MULTI_EPOCH_LEADERS_ALIGN))) mleaders_mem[ FD_MULTI_EPOCH_LEADERS_FOOTPRINT ];
@@ -473,6 +478,11 @@ metrics_write( fd_replay_tile_t * ctx ) {
473478
FD_MCNT_SET( REPLAY, SLOTS_TOTAL, ctx->metrics.slots_total );
474479
FD_MCNT_SET( REPLAY, TRANSACTIONS_TOTAL, ctx->metrics.transactions_total );
475480

481+
FD_MCNT_SET( REPLAY, SCHED_FULL, ctx->metrics.sched_full );
482+
FD_MCNT_SET( REPLAY, REASM_EMPTY, ctx->metrics.reasm_empty );
483+
FD_MCNT_SET( REPLAY, LEADER_BID_WAIT, ctx->metrics.leader_bid_wait );
484+
FD_MCNT_SET( REPLAY, BANKS_FULL, ctx->metrics.banks_full );
485+
476486
FD_MCNT_SET( REPLAY, PROGCACHE_ROOTED, ctx->progcache_admin->metrics.root_cnt );
477487
FD_MCNT_SET( REPLAY, PROGCACHE_GC_ROOT, ctx->progcache_admin->metrics.gc_root_cnt );
478488

@@ -1600,8 +1610,15 @@ replay( fd_replay_tile_t * ctx,
16001610
static int
16011611
can_process_fec( fd_replay_tile_t * ctx ) {
16021612
fd_reasm_fec_t * fec;
1603-
if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched, 1UL ) ) ) return 0;
1604-
if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) return 0;
1613+
if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched, 1UL ) ) ) {
1614+
ctx->metrics.sched_full++;
1615+
return 0;
1616+
}
1617+
1618+
if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) {
1619+
ctx->metrics.reasm_empty++;
1620+
return 0;
1621+
}
16051622

16061623
if( FD_UNLIKELY( ctx->is_leader && fec->fec_set_idx==0U && fd_reasm_parent( ctx->reasm, fec )->bank_idx==ctx->leader_bank->idx ) ) {
16071624
/* There's a race that's exceedingly rare, where we receive the
@@ -1618,12 +1635,16 @@ can_process_fec( fd_replay_tile_t * ctx ) {
16181635
ordering invariants in banks and sched. */
16191636
FD_TEST( ctx->recv_block_id );
16201637
FD_TEST( !ctx->recv_poh );
1638+
ctx->metrics.leader_bid_wait++;
16211639
return 0;
16221640
}
16231641

16241642
/* If fec_set_idx is 0, we need a new bank for a new slot. Banks must
16251643
not be full in this case. */
1626-
if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && fec->fec_set_idx==0 ) ) return 0;
1644+
if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && fec->fec_set_idx==0 ) ) {
1645+
ctx->metrics.banks_full++;
1646+
return 0;
1647+
}
16271648

16281649
/* Otherwise, banks may not be full, so we can always create a new
16291650
bank if needed. Or, if banks are full, the current fec set's
@@ -1897,6 +1918,8 @@ after_credit( fd_replay_tile_t * ctx,
18971918
*charge_busy = 1;
18981919
*opt_poll_in = 0;
18991920
return;
1921+
} else {
1922+
19001923
}
19011924

19021925
*charge_busy = replay( ctx, stem );

0 commit comments

Comments
 (0)