From f376c4fd0148ac25f03fec99b2820118fb18a731 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 30 Sep 2025 10:20:23 -0700
Subject: [PATCH 1/2] Implement pool of empty pages in central with some basic
 operations

---
 Makefile.in                                   |   3 +-
 include/jemalloc/internal/arena_externs.h     |  11 +-
 include/jemalloc/internal/ctl.h               |   2 +
 include/jemalloc/internal/hpa.h               |  14 +
 include/jemalloc/internal/hpa_central.h       |  47 +++
 include/jemalloc/internal/hpa_opts.h          |  10 +-
 .../internal/jemalloc_internal_externs.h      |   1 +
 include/jemalloc/internal/mutex_prof.h        |   3 +-
 include/jemalloc/internal/witness.h           |   1 +
 src/arena.c                                   |  36 ++
 src/ctl.c                                     |  57 ++-
 src/hpa.c                                     |  93 ++++-
 src/hpa_central.c                             | 183 ++++++++++
 src/jemalloc.c                                |   7 +
 src/stats.c                                   |  36 +-
 test/unit/hpa.c                               |  12 +-
 test/unit/hpa_central_pool.c                  | 329 ++++++++++++++++++
 test/unit/hpa_vectorized_madvise.c            |   4 +-
 .../unit/hpa_vectorized_madvise_large_batch.c |   4 +-
 test/unit/mallctl.c                           |   4 +
 20 files changed, 837 insertions(+), 20 deletions(-)
 create mode 100644 test/unit/hpa_central_pool.c

diff --git a/Makefile.in b/Makefile.in
index 4b5b6507e4..ca1bfaa629 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -232,9 +232,10 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_sec_integration.c \
 	$(srcroot)test/unit/hpa_thp_always.c \
+	$(srcroot)test/unit/hpa_background_thread.c \
+	$(srcroot)test/unit/hpa_central_pool.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
-	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 1d00463527..da55e646c5 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -46,8 +46,12 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
-    hpa_shard_stats_t *hpastats);
-void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
+    hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+void arena_stats_global_central_read(tsdn_t *tsdn, hpa_central_stats_t *stats);
+void arena_stats_global_central_mutex_read(
+    tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data);
+
+void     arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
 edata_t *arena_extent_alloc_large(
     tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(
@@ -125,7 +129,10 @@ void   arena_prefork5(tsdn_t *tsdn, arena_t *arena);
 void   arena_prefork6(tsdn_t *tsdn, arena_t *arena);
 void   arena_prefork7(tsdn_t *tsdn, arena_t *arena);
 void   arena_prefork8(tsdn_t *tsdn, arena_t *arena);
+void   arena_global_prefork(tsdn_t *tsdn, bool use_hpa);
 void   arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void   arena_global_postfork_parent(tsdn_t *tsdn, bool use_hpa);
 void   arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+void   arena_global_postfork_child(tsdn_t *tsdn, bool use_hpa);
 
 #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 82035fe366..e39785b0a2 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/background_thread_structs.h"
 #include "jemalloc/internal/bin_stats.h"
+#include "jemalloc/internal/hpa_central.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex_prof.h"
@@ -65,6 +66,7 @@ typedef struct ctl_stats_s {
 	size_t retained;
 
 	background_thread_stats_t background_thread;
+	hpa_central_stats_t       hpa_central;
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes];
 } ctl_stats_t;
 
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index dc7725b77a..a7864ab22e 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -51,6 +51,20 @@ struct hpa_shard_nonderived_stats_s {
 	 * Guarded by mtx.
 	 */
 	uint64_t ndehugifies;
+
+	/*
+	 * The number of times we donated pageslab to central pool
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t ndonated_ps;
+
+	/*
+	 * The number of times we borrowed pageslab from a central pool
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t nborrowed_ps;
 };
 
 /* Completely derived; only used by CTL. */
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
index 3e0ff7daeb..947c5463ed 100644
--- a/include/jemalloc/internal/hpa_central.h
+++ b/include/jemalloc/internal/hpa_central.h
@@ -8,8 +8,35 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd_types.h"
 
+typedef struct hpa_pool_s hpa_pool_t;
+struct hpa_pool_s {
+	/*
+	 * Pool of empty huge pages to be shared between shards that are
+	 * participating.
+	 *
+	 * Page is owned by the  pool if it lives in one of these two lists.
+	 * This means that it should not be part of any hpa_shard's psset at the
+	 * same time.
+	 */
+	hpdata_empty_list_t nonpurged;
+	hpdata_empty_list_t purged;
+};
+
+typedef struct hpa_central_stats_s hpa_central_stats_t;
+struct hpa_central_stats_s {
+	/* Number of pages purged while they were in the central pool */
+	uint64_t npurged_pool;
+
+	/* Total number of dirty base pages in the pool */
+	size_t ndirty_pool;
+};
+
 typedef struct hpa_central_s hpa_central_t;
 struct hpa_central_s {
+	/* Guards the access to central pool of empty hugepages */
+	malloc_mutex_t pool_mtx;
+	hpa_pool_t     pool;
+
 	/*
 	 * Guards expansion of eden.  We separate this from the regular mutex so
 	 * that cheaper operations can still continue while we're doing the OS
@@ -30,6 +57,9 @@ struct hpa_central_s {
 
 	/* The HPA hooks. */
 	hpa_hooks_t hooks;
+
+	/* Stats */
+	hpa_central_stats_t stats;
 };
 
 bool hpa_central_init(
@@ -38,4 +68,21 @@ bool hpa_central_init(
 hpdata_t *hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
     uint64_t age, bool hugify_eager, bool *oom);
 
+/* Donate empty pages to central */
+void hpa_central_ps_insert(tsdn_t *tsdn, hpa_central_t *central,
+    hpdata_empty_list_t *pages, const nstime_t *now);
+/* Get empty page from central without growing it */
+hpdata_t *hpa_central_ps_pop(tsdn_t *tsdn, hpa_central_t *central);
+
+/* Purge up to max_ps empty pages in the central */
+size_t hpa_central_purge(
+    tsdn_t *tsdn, hpa_central_t *central, const nstime_t *now, size_t max_ps);
+
+void hpa_central_prefork(tsdn_t *tsdn, hpa_central_t *central);
+void hpa_central_postfork_parent(tsdn_t *tsdn, hpa_central_t *central);
+void hpa_central_postfork_child(tsdn_t *tsdn, hpa_central_t *central);
+
+void hpa_central_stats_read(
+    tsdn_t *tsdn, hpa_central_t *central, hpa_central_stats_t *stats);
+
 #endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 6747c2db8e..e5c32f2afc 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -152,6 +152,12 @@ struct hpa_shard_opts_s {
 	 * hpa_hugify_style_t for options).
 	 */
 	hpa_hugify_style_t hugify_style;
+
+	/*
+	 * If use_pool is true this shard will donate empty pages to the pool
+	 * and borrow from the pool before using central allocator.
+	 */
+	bool use_pool;
 };
 
 /* clang-format off */
@@ -183,7 +189,9 @@ struct hpa_shard_opts_s {
 	/* min_purge_delay_ms */             				\
 	0,  								\
 	/* hugify_style */                				\
-	hpa_hugify_style_lazy						\
+	hpa_hugify_style_lazy,  					\
+	/* use_pool */  						\
+	false	        						\
 }
 /* clang-format on */
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index ea739ea88c..e0d23352e0 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -17,6 +17,7 @@ extern bool             opt_abort_conf;
 extern bool             opt_trust_madvise;
 extern bool             opt_experimental_hpa_start_huge_if_thp_always;
 extern bool             opt_experimental_hpa_enforce_hugify;
+extern uint64_t         opt_hpa_pool_purge_delay_ms;
 extern bool             opt_confirm_conf;
 extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 572200f353..b61d9d4aa9 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -36,7 +36,8 @@ typedef enum {
 	OP(tcache_list)                                                        \
 	OP(hpa_shard)                                                          \
 	OP(hpa_shard_grow)                                                     \
-	OP(hpa_sec)
+	OP(hpa_sec)                                                            \
+	OP(hpa_central_pool)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 0a426ff567..3ca2e4ed42 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -56,6 +56,7 @@ enum witness_rank_e {
 	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
 
 	WITNESS_RANK_HPA_CENTRAL_GROW,
+	WITNESS_RANK_HPA_CENTRAL_POOL,
 	WITNESS_RANK_HPA_CENTRAL,
 
 	WITNESS_RANK_EDATA_CACHE,
diff --git a/src/arena.c b/src/arena.c
index 5b144c63a0..261dbbd17c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -213,6 +213,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 }
 
+void
+arena_stats_global_central_read(tsdn_t *tsdn, hpa_central_stats_t *stats) {
+	hpa_central_stats_read(tsdn, &arena_pa_central_global.hpa, stats);
+}
+
+void
+arena_stats_global_central_mutex_read(
+    tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data) {
+	malloc_mutex_lock(tsdn, &arena_pa_central_global.hpa.pool_mtx);
+	malloc_mutex_prof_read(
+	    tsdn, mutex_prof_data, &arena_pa_central_global.hpa.pool_mtx);
+	malloc_mutex_unlock(tsdn, &arena_pa_central_global.hpa.pool_mtx);
+}
+
+
 static void
 arena_background_thread_inactivity_check(
     tsdn_t *tsdn, arena_t *arena, bool is_background_thread) {
@@ -2321,6 +2336,13 @@ arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
 	}
 }
 
+void
+arena_global_prefork(tsdn_t *tsdn, bool use_hpa) {
+	if (use_hpa) {
+		hpa_central_prefork(tsdn, &arena_pa_central_global.hpa);
+	}
+}
+
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < nbins_total; i++) {
@@ -2336,6 +2358,13 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 }
 
+void
+arena_global_postfork_parent(tsdn_t *tsdn, bool use_hpa) {
+	if (use_hpa) {
+		hpa_central_postfork_parent(tsdn, &arena_pa_central_global.hpa);
+	}
+}
+
 void
 arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
@@ -2374,3 +2403,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}
 }
+
+void
+arena_global_postfork_child(tsdn_t *tsdn, bool use_hpa) {
+	if (use_hpa) {
+		hpa_central_postfork_child(tsdn, &arena_pa_central_global.hpa);
+	}
+}
diff --git a/src/ctl.c b/src/ctl.c
index 1260e197da..0d191749ac 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -111,6 +111,8 @@ CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_purge_threshold)
 CTL_PROTO(opt_hpa_min_purge_delay_ms)
 CTL_PROTO(opt_hpa_hugify_style)
+CTL_PROTO(opt_hpa_use_pool)
+CTL_PROTO(opt_hpa_pool_purge_delay_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -273,6 +275,8 @@ CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
+CTL_PROTO(stats_arenas_i_hpa_shard_ndonated_ps)
+CTL_PROTO(stats_arenas_i_hpa_shard_nborrowed_ps)
 
 /* Set of stats for non-hugified and hugified slabs. */
 CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)
@@ -349,6 +353,8 @@ CTL_PROTO(stats_active)
 CTL_PROTO(stats_background_thread_num_threads)
 CTL_PROTO(stats_background_thread_num_runs)
 CTL_PROTO(stats_background_thread_run_interval)
+CTL_PROTO(stats_central_pool_ndirty)
+CTL_PROTO(stats_central_pool_npurged)
 CTL_PROTO(stats_metadata)
 CTL_PROTO(stats_metadata_edata)
 CTL_PROTO(stats_metadata_rtree)
@@ -486,6 +492,8 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("hpa_purge_threshold"), CTL(opt_hpa_purge_threshold)},
     {NAME("hpa_min_purge_delay_ms"), CTL(opt_hpa_min_purge_delay_ms)},
     {NAME("hpa_hugify_style"), CTL(opt_hpa_hugify_style)},
+    {NAME("hpa_use_pool"), CTL(opt_hpa_use_pool)},
+    {NAME("hpa_pool_purge_delay_ms"), CTL(opt_hpa_pool_purge_delay_ms)},
     {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
     {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
     {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
@@ -795,6 +803,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
     {NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)},
     {NAME("nhugify_failures"), CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
     {NAME("ndehugifies"), CTL(stats_arenas_i_hpa_shard_ndehugifies)},
+    {NAME("ndonated_ps"), CTL(stats_arenas_i_hpa_shard_ndonated_ps)},
+    {NAME("nborrowed_ps"), CTL(stats_arenas_i_hpa_shard_nborrowed_ps)},
 
     {NAME("full_slabs"), CHILD(named, stats_arenas_i_hpa_shard_full_slabs)},
     {NAME("empty_slabs"), CHILD(named, stats_arenas_i_hpa_shard_empty_slabs)},
@@ -852,6 +862,10 @@ static const ctl_named_node_t stats_background_thread_node[] = {
     {NAME("num_runs"), CTL(stats_background_thread_num_runs)},
     {NAME("run_interval"), CTL(stats_background_thread_run_interval)}};
 
+static const ctl_named_node_t stats_central_pool_node[] = {
+    {NAME("ndirty"), CTL(stats_central_pool_ndirty)},
+    {NAME("npurged"), CTL(stats_central_pool_npurged)}};
+
 #define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
 MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
@@ -881,6 +895,7 @@ static const ctl_named_node_t stats_node[] = {
     {NAME("mutexes"), CHILD(named, stats_mutexes)},
     {NAME("arenas"), CHILD(indexed, stats_arenas)},
     {NAME("zero_reallocs"), CTL(stats_zero_reallocs)},
+    {NAME("central_pool"), CHILD(named, stats_central_pool)},
 };
 
 static const ctl_named_node_t experimental_hooks_node[] = {
@@ -1064,6 +1079,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 	}
 }
 
+static bool ctl_ever_used_central_pool(const hpa_shard_stats_t *hpastats);
+
 static void
 ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 	unsigned i;
@@ -1077,6 +1094,14 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
 		    &ctl_arena->astats->hpastats);
 
+		/* Read central pool mutex stats for arena 0 only */
+		if (ctl_arena->arena_ind == 0 &&
+		    ctl_ever_used_central_pool(&ctl_arena->astats->hpastats)) {
+			arena_stats_global_central_mutex_read(tsdn,
+			    &ctl_arena->astats->astats
+			         .mutex_prof_data[arena_prof_mutex_hpa_central_pool]);
+		}
+
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
 			    &ctl_arena->astats->bstats[i].stats_data;
@@ -1311,6 +1336,17 @@ ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
 	return arena_ind;
 }
 
+static bool
+ctl_ever_used_central_pool(const hpa_shard_stats_t *hpastats) {
+	return hpastats->nonderived_stats.ndonated_ps > 0
+	    || hpastats->nonderived_stats.nborrowed_ps > 0;
+}
+
+static void
+ctl_hpa_central_stats_read(tsdn_t *tsdn, hpa_central_stats_t *central_stats) {
+	arena_stats_global_central_read(tsdn, central_stats);
+}
+
 static void
 ctl_background_thread_stats_read(tsdn_t *tsdn) {
 	background_thread_stats_t *stats = &ctl_stats->background_thread;
@@ -1361,6 +1397,13 @@ ctl_refresh(tsdn_t *tsdn) {
 	}
 
 	if (config_stats) {
+		if (ctl_ever_used_central_pool(&ctl_sarena->astats->hpastats)) {
+			ctl_hpa_central_stats_read(
+			    tsdn, &ctl_stats->hpa_central);
+		} else {
+			ctl_stats->hpa_central.npurged_pool = 0;
+			ctl_stats->hpa_central.ndirty_pool = 0;
+		}
 		ctl_stats->allocated = ctl_sarena->astats->allocated_small
 		    + ctl_sarena->astats->astats.allocated_large;
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
@@ -1371,7 +1414,8 @@ ctl_refresh(tsdn_t *tsdn) {
 		    ctl_sarena->astats->astats.metadata_edata;
 		ctl_stats->metadata_rtree =
 		    ctl_sarena->astats->astats.metadata_rtree;
-		ctl_stats->resident = ctl_sarena->astats->astats.resident;
+		ctl_stats->resident = ctl_sarena->astats->astats.resident
+		    + ctl_stats->hpa_central.ndirty_pool;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
@@ -2172,6 +2216,9 @@ CTL_RO_NL_GEN(
     opt_hpa_min_purge_delay_ms, opt_hpa_opts.min_purge_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_style,
     hpa_hugify_style_names[opt_hpa_opts.hugify_style], const char *)
+CTL_RO_NL_GEN(opt_hpa_use_pool, opt_hpa_opts.use_pool, bool)
+CTL_RO_NL_GEN(
+    opt_hpa_pool_purge_delay_ms, opt_hpa_pool_purge_delay_ms, uint64_t)
 /*
  * This will have to change before we publicly document this option; fxp_t and
  * its representation are internal implementation details.
@@ -3805,6 +3852,10 @@ approximate_stats_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 label_return:
 	return ret;
 }
+CTL_RO_CGEN(config_stats, stats_central_pool_ndirty,
+    ctl_stats->hpa_central.ndirty_pool, size_t)
+CTL_RO_CGEN(config_stats, stats_central_pool_npurged,
+    ctl_stats->hpa_central.npurged_pool, uint64_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
 CTL_RO_GEN(
@@ -4120,6 +4171,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugify_failures,
     uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndonated_ps,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndonated_ps, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nborrowed_ps,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nborrowed_ps, uint64_t);
 
 /* Full, nonhuge */
 CTL_RO_CGEN(config_stats,
diff --git a/src/hpa.c b/src/hpa.c
index 7e5b5f7224..53c24cd9ce 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -109,6 +109,8 @@ hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
 	shard->stats.nhugifies = 0;
 	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
+	shard->stats.ndonated_ps = 0;
+	shard->stats.nborrowed_ps = 0;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -145,6 +147,8 @@ hpa_shard_nonderived_stats_accum(
 	dst->nhugifies += src->nhugifies;
 	dst->nhugify_failures += src->nhugify_failures;
 	dst->ndehugifies += src->ndehugifies;
+	dst->ndonated_ps += src->ndonated_ps;
+	dst->nborrowed_ps += src->nborrowed_ps;
 }
 
 void
@@ -285,6 +289,18 @@ hpa_assume_huge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	}
 }
 
+static void
+hpa_update_purgable_time(hpa_shard_t *shard, hpdata_t *ps) {
+	if (shard->opts.min_purge_delay_ms == 0) {
+		return;
+	}
+	nstime_t now;
+	uint64_t delayns = shard->opts.min_purge_delay_ms * 1000 * 1000;
+	shard->central->hooks.curtime(&now, /* first_reading */ true);
+	nstime_iadd(&now, delayns);
+	hpdata_time_purge_allowed_set(ps, &now);
+}
+
 static void
 hpa_update_purge_hugify_eligibility(
     tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
@@ -328,13 +344,8 @@ hpa_update_purge_hugify_eligibility(
 		hpdata_allow_hugify(ps, now);
 	}
 	bool purgable = hpa_good_purge_candidate(shard, ps);
-	if (purgable && !hpdata_purge_allowed_get(ps)
-	    && (shard->opts.min_purge_delay_ms > 0)) {
-		nstime_t now;
-		uint64_t delayns = shard->opts.min_purge_delay_ms * 1000 * 1000;
-		shard->central->hooks.curtime(&now, /* first_reading */ true);
-		nstime_iadd(&now, delayns);
-		hpdata_time_purge_allowed_set(ps, &now);
+	if (purgable && !hpdata_purge_allowed_get(ps)) {
+		hpa_update_purgable_time(shard, ps);
 	}
 	hpdata_purge_allowed_set(ps, purgable);
 
@@ -449,6 +460,42 @@ hpa_purge_finish_hp(
 	psset_update_end(&shard->psset, hp_item->hp);
 }
 
+static void
+hpa_donate_empty_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (!shard->opts.use_pool) {
+		return;
+	}
+
+	hpdata_empty_list_t to_donate;
+	hpdata_empty_list_init(&to_donate);
+	do {
+		hpdata_t *to_purge = (shard->opts.min_purge_delay_ms > 0)
+		    ? psset_pick_purge(
+		          &shard->psset, &shard->last_time_work_attempted)
+		    : psset_pick_purge(&shard->psset, NULL);
+
+		if (to_purge == NULL || !hpdata_empty(to_purge)) {
+			break;
+		}
+		assert(hpdata_ndirty_get(to_purge) > 0);
+
+		/* Donate the page to the pool */
+		psset_remove(&shard->psset, to_purge);
+		hpdata_empty_list_append(&to_donate, to_purge);
+		shard->stats.ndonated_ps++;
+	} while (true);
+
+	if (!hpdata_empty_list_empty(&to_donate)) {
+		nstime_t now;
+		nstime_copy(&now, &shard->last_time_work_attempted);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		hpa_central_ps_insert(tsdn, shard->central, &to_donate,
+		    &shard->last_time_work_attempted);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+	}
+}
+
 /* Returns number of huge pages purged. */
 static inline size_t
 hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
@@ -468,6 +515,8 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 	};
 	assert(batch.range_watermark > 0);
 
+	hpa_donate_empty_ps(tsdn, shard);
+
 	while (1) {
 		hpa_batch_pass_start(&batch);
 		assert(hpa_batch_empty(&batch));
@@ -635,6 +684,17 @@ hpa_shard_maybe_do_deferred_work(
 			max_purges = max_purge_nhp;
 		}
 
+		if (shard->opts.use_pool) {
+			size_t         max_pool_ops = (forced ? (size_t)-1 : 8);
+			hpa_central_t *central = shard->central;
+			nstime_t       now;
+			nstime_copy(&now, &shard->last_time_work_attempted);
+			/* we do not need to hold shard lock when purging the central */
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			hpa_central_purge(tsdn, central, &now, max_pool_ops);
+			malloc_mutex_lock(tsdn, &shard->mtx);
+		}
+
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		nops += hpa_purge(tsdn, shard, max_purges);
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
@@ -650,6 +710,19 @@ hpa_shard_maybe_do_deferred_work(
 	}
 }
 
+static void
+hpa_add_pool_page_to_psset(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	assert(hpdata_alloc_allowed_get(ps) && hpdata_empty(ps)
+	    && hpdata_consistent(ps));
+	if (hpdata_purge_allowed_get(ps)) {
+		hpa_update_purgable_time(shard, ps);
+		if (hpdata_huge_get(ps)) {
+			shard->stats.nborrowed_ps++;
+		}
+	}
+	psset_insert(&shard->psset, ps);
+}
+
 static edata_t *
 hpa_try_alloc_one_no_grow(
     tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
@@ -663,6 +736,12 @@ hpa_try_alloc_one_no_grow(
 	}
 
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
+	if (ps == NULL && shard->opts.use_pool) {
+		ps = hpa_central_ps_pop(tsdn, shard->central);
+		if (ps != NULL) {
+			hpa_add_pool_page_to_psset(tsdn, shard, ps);
+		}
+	}
 	if (ps == NULL) {
 		edata_cache_fast_put(tsdn, &shard->ecf, edata);
 		return NULL;
diff --git a/src/hpa_central.c b/src/hpa_central.c
index b4f770c2cb..281e265ea2 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -2,10 +2,138 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpa_central.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/hpa_utils.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/witness.h"
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
+#define MILLION UINT64_C(1000000)
+
+uint64_t opt_hpa_pool_purge_delay_ms = 10000; /* 10s */
+
+void
+hpa_central_pool_init(hpa_pool_t *pool) {
+	hpdata_empty_list_init(&pool->nonpurged);
+	hpdata_empty_list_init(&pool->purged);
+}
+
+void
+hpa_central_stats_read(
+    tsdn_t *tsdn, hpa_central_t *central, hpa_central_stats_t *stats) {
+	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	stats->ndirty_pool = central->stats.ndirty_pool;
+	stats->npurged_pool = central->stats.npurged_pool;
+	malloc_mutex_unlock(tsdn, &central->pool_mtx);
+}
+
+static inline void
+hpa_central_pool_concat_nonpurged(tsdn_t *tsdn, hpa_central_t *central,
+    hpdata_empty_list_t *pages, size_t new_dirty) {
+	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	hpdata_empty_list_concat(&central->pool.nonpurged, pages);
+	central->stats.ndirty_pool += new_dirty;
+	malloc_mutex_unlock(tsdn, &central->pool_mtx);
+}
+
+static void
+hpa_central_get_nonpurged(tsdn_t *tsdn, hpa_central_t *central,
+    const nstime_t *now, hpa_purge_batch_t *batch) {
+	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	while (!hpa_batch_full(batch)
+	    && !hpdata_empty_list_empty(&central->pool.nonpurged)) {
+		hpdata_t *ps = hpdata_empty_list_first(
+		    &central->pool.nonpurged);
+		assert(hpdata_empty(ps) && hpdata_purge_allowed_get(ps));
+
+		const nstime_t *allowed = hpdata_time_purge_allowed_get(ps);
+		if (nstime_compare(now, allowed) < 0) {
+			break;
+		}
+		hpdata_empty_list_remove(&central->pool.nonpurged, ps);
+		assert(batch->item_cnt < batch->items_capacity);
+		hpa_purge_item_t *hp_item = &batch->items[batch->item_cnt];
+		batch->item_cnt++;
+		hp_item->hp = ps;
+		hp_item->dehugify = hpdata_huge_get(hp_item->hp);
+		size_t nranges;
+		hpdata_alloc_allowed_set(hp_item->hp, false);
+		size_t ndirty = hpdata_purge_begin(
+		    hp_item->hp, &hp_item->state, &nranges);
+		assert(ndirty > 0 && nranges > 0);
+		batch->ndirty_in_batch += ndirty;
+		batch->nranges += nranges;
+		batch->npurged_hp_total++;
+	}
+	malloc_mutex_unlock(tsdn, &central->pool_mtx);
+}
+
+static void
+hpa_central_put_purged(
+    tsdn_t *tsdn, hpa_central_t *central, const hpa_purge_batch_t *batch) {
+	assert(batch->item_cnt > 0);
+	hpdata_empty_list_t newly_purged;
+	hpdata_empty_list_init(&newly_purged);
+
+	for (size_t i = 0; i < batch->item_cnt; ++i) {
+		hpa_purge_item_t *hp_item = &batch->items[i];
+		if (hp_item->dehugify) {
+			hpdata_dehugify(hp_item->hp);
+		}
+		hpdata_purge_end(hp_item->hp, &hp_item->state);
+		hpdata_alloc_allowed_set(hp_item->hp, true);
+		hpdata_purge_allowed_set(hp_item->hp, false);
+		hpdata_empty_list_append(&newly_purged, hp_item->hp);
+	}
+
+	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	hpdata_empty_list_concat(&central->pool.purged, &newly_purged);
+	central->stats.npurged_pool += batch->npurged_hp_total;
+	assert(central->stats.ndirty_pool >= batch->ndirty_in_batch);
+	central->stats.ndirty_pool -= batch->ndirty_in_batch;
+	malloc_mutex_unlock(tsdn, &central->pool_mtx);
+}
+
+void
+hpa_central_ps_insert(tsdn_t *tsdn, hpa_central_t *central,
+    hpdata_empty_list_t *pages, const nstime_t *now) {
+	assert(!hpdata_empty_list_empty(pages));
+
+	assert(now != NULL);
+	nstime_t purge_time;
+	nstime_copy(&purge_time, now);
+	uint64_t purge_delay_ns = opt_hpa_pool_purge_delay_ms * MILLION;
+	nstime_iadd(&purge_time, purge_delay_ns);
+
+	hpdata_t *ps;
+	size_t    new_dirty = 0;
+	ql_foreach (ps, &pages->head, ql_link_empty) {
+		assert(hpdata_empty(ps));
+		assert(hpdata_ndirty_get(ps) > 0);
+		hpdata_time_purge_allowed_set(ps, &purge_time);
+		new_dirty += hpdata_ndirty_get(ps);
+	}
+	hpa_central_pool_concat_nonpurged(tsdn, central, pages, new_dirty);
+}
+
+hpdata_t *
+hpa_central_ps_pop(tsdn_t *tsdn, hpa_central_t *central) {
+	hpdata_t *ps = NULL;
+
+	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	if (!hpdata_empty_list_empty(&central->pool.nonpurged)) {
+		ps = hpdata_empty_list_first(&central->pool.nonpurged);
+		hpdata_empty_list_remove(&central->pool.nonpurged, ps);
+	}
+	if (ps == NULL && !hpdata_empty_list_empty(&central->pool.purged)) {
+		ps = hpdata_empty_list_first(&central->pool.purged);
+		hpdata_empty_list_remove(&central->pool.purged, ps);
+	}
+	malloc_mutex_unlock(tsdn, &central->pool_mtx);
+
+	return ps;
+}
 
 bool
 hpa_central_init(
@@ -19,10 +147,19 @@ hpa_central_init(
 		return true;
 	}
 
+	err = malloc_mutex_init(&central->pool_mtx, "hpa_central_pool",
+	    WITNESS_RANK_HPA_CENTRAL_POOL, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	hpa_central_pool_init(&central->pool);
+
 	central->base = base;
 	central->eden = NULL;
 	central->eden_len = 0;
 	central->hooks = *hooks;
+	central->stats.npurged_pool = 0;
+	central->stats.ndirty_pool = 0;
 	return false;
 }
 
@@ -119,3 +256,49 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 
 	return ps;
 }
+
+size_t
+hpa_central_purge(
+    tsdn_t *tsdn, hpa_central_t *central, const nstime_t *now, size_t max_ps) {
+	VARIABLE_ARRAY(hpa_purge_item_t, items, HPA_PURGE_BATCH_MAX);
+	hpa_purge_batch_t batch = {
+	    .max_hp = max_ps,
+	    .npurged_hp_total = 0,
+	    .items = &items[0],
+	    .items_capacity = HPA_PURGE_BATCH_MAX,
+	    .range_watermark = hpa_process_madvise_max_iovec_len(),
+	};
+	assert(batch.range_watermark > 0);
+
+	do {
+		hpa_batch_pass_start(&batch);
+		assert(hpa_batch_empty(&batch));
+		hpa_central_get_nonpurged(tsdn, central, now, &batch);
+		if (hpa_batch_empty(&batch)) {
+			break;
+		}
+		/* We don't need any lock while purging pages from the pool. */
+		hpa_purge_batch(&central->hooks, batch.items, batch.item_cnt);
+		hpa_central_put_purged(tsdn, central, &batch);
+	} while (hpa_batch_full(&batch));
+	return batch.npurged_hp_total;
+}
+
+/*
+ *No need to do any of below for central->grow_mtx as shard->grow_mtx must be
+ * held to lock that one.
+ */
+void
+hpa_central_prefork(tsdn_t *tsdn, hpa_central_t *central) {
+	malloc_mutex_prefork(tsdn, &central->pool_mtx);
+}
+
+void
+hpa_central_postfork_parent(tsdn_t *tsdn, hpa_central_t *central) {
+	malloc_mutex_postfork_parent(tsdn, &central->pool_mtx);
+}
+
+void
+hpa_central_postfork_child(tsdn_t *tsdn, hpa_central_t *central) {
+	malloc_mutex_postfork_child(tsdn, &central->pool_mtx);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5d23962d67..6bc25e24ed 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1677,6 +1677,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
+			CONF_HANDLE_BOOL(opt_hpa_opts.use_pool, "hpa_use_pool");
+			CONF_HANDLE_UINT64_T(opt_hpa_pool_purge_delay_ms,
+			    "hpa_pool_purge_delay_ms", 0, UINT64_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
@@ -4516,6 +4520,7 @@ _malloc_prefork(void)
 			}
 		}
 	}
+	arena_global_prefork(tsd_tsdn(tsd), opt_hpa);
 	prof_prefork1(tsd_tsdn(tsd));
 	stats_prefork(tsd_tsdn(tsd));
 	tsd_prefork(tsd);
@@ -4553,6 +4558,7 @@ _malloc_postfork(void)
 			arena_postfork_parent(tsd_tsdn(tsd), arena);
 		}
 	}
+	arena_global_postfork_parent(tsd_tsdn(tsd), opt_hpa);
 	prof_postfork_parent(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_parent(tsd_tsdn(tsd));
@@ -4583,6 +4589,7 @@ jemalloc_postfork_child(void) {
 			arena_postfork_child(tsd_tsdn(tsd), arena);
 		}
 	}
+	arena_global_postfork_child(tsd_tsdn(tsd), opt_hpa);
 	prof_postfork_child(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_child(tsd_tsdn(tsd));
diff --git a/src/stats.c b/src/stats.c
index be70a6fcb0..848aae01ef 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -843,6 +843,9 @@ stats_arena_hpa_shard_counters_print(
 	uint64_t nhugifies;
 	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
+	uint64_t ndonated_ps;
+	uint64_t nborrowed_ps;
+	;
 
 	CTL_M2_GET(
 	    "stats.arenas.0.hpa_shard.npageslabs", i, &npageslabs, size_t);
@@ -874,6 +877,10 @@ stats_arena_hpa_shard_counters_print(
 	    &nhugify_failures, uint64_t);
 	CTL_M2_GET(
 	    "stats.arenas.0.hpa_shard.ndehugifies", i, &ndehugifies, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.ndonated_ps", i, &ndonated_ps, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nborrowed_ps", i, &nborrowed_ps,
+	    uint64_t);
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
@@ -891,6 +898,10 @@ stats_arena_hpa_shard_counters_print(
 	    " / sec)\n"
 	    "  Dehugifies: %" FMTu64 " (%" FMTu64
 	    " / sec)\n"
+	    "  Donated ps to pool: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Borrowed ps from the pool: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
 	    "\n",
 	    npageslabs, npageslabs_huge, npageslabs_nonhuge, nactive,
 	    nactive_huge, nactive_nonhuge, ndirty, ndirty_huge, ndirty_nonhuge,
@@ -899,7 +910,9 @@ stats_arena_hpa_shard_counters_print(
 	    rate_per_second(npurges, uptime), nhugifies,
 	    rate_per_second(nhugifies, uptime), nhugify_failures,
 	    rate_per_second(nhugify_failures, uptime), ndehugifies,
-	    rate_per_second(ndehugifies, uptime));
+	    rate_per_second(ndehugifies, uptime), ndonated_ps,
+	    rate_per_second(ndonated_ps, uptime), nborrowed_ps,
+	    rate_per_second(nborrowed_ps, uptime));
 
 	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
 	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
@@ -913,6 +926,10 @@ stats_arena_hpa_shard_counters_print(
 	    &nhugify_failures);
 	emitter_json_kv(
 	    emitter, "ndehugifies", emitter_type_uint64, &ndehugifies);
+	emitter_json_kv(
+	    emitter, "ndonated_ps", emitter_type_uint64, &ndonated_ps);
+	emitter_json_kv(
+	    emitter, "nborrowed_ps", emitter_type_uint64, &nborrowed_ps);
 
 	emitter_json_object_kv_begin(emitter, "slabs");
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
@@ -1141,7 +1158,11 @@ stats_arena_mutexes_print(
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes");
 
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
-	    i++) {
+	     i++) {
+		/* hpa_central_pool is global, only print for arena 0 */
+		if (i == arena_prof_mutex_hpa_central_pool && arena_ind != 0) {
+			continue;
+		}
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
 		mutex_stats_read_arena(
@@ -1665,6 +1686,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_purge_threshold")
 	OPT_WRITE_UINT64("hpa_min_purge_delay_ms")
 	OPT_WRITE_CHAR_P("hpa_hugify_style")
+	OPT_WRITE_BOOL("hpa_use_pool")
+	OPT_WRITE_UINT64("hpa_pool_purge_delay_ms")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
@@ -1870,7 +1893,9 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	    metadata_thp, resident, mapped, retained;
 	size_t   num_background_threads;
 	size_t   zero_reallocs;
+	size_t   ndirty_pool;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
+	uint64_t npurged_pool;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
 	CTL_GET("stats.active", &active, size_t);
@@ -1883,6 +1908,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	CTL_GET("stats.retained", &retained, size_t);
 
 	CTL_GET("stats.zero_reallocs", &zero_reallocs, size_t);
+	CTL_GET("stats.central_pool.ndirty", &ndirty_pool, size_t);
+	CTL_GET("stats.central_pool.npurged", &npurged_pool, uint64_t);
 
 	if (have_background_thread) {
 		CTL_GET("stats.background_thread.num_threads",
@@ -1925,6 +1952,11 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_table_printf(emitter,
 	    "Count of realloc(non-null-ptr, 0) calls: %zu\n", zero_reallocs);
 
+	/* Central pool */
+	emitter_table_printf(emitter,
+	    "Central pool dirty: %zu, purged: %" FMTu64 "\n", ndirty_pool,
+	    npurged_pool);
+
 	/* Background thread stats. */
 	emitter_json_object_kv_begin(emitter, "background_thread");
 	emitter_json_kv(
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 9c4253cd28..ef558ddb4a 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -43,7 +43,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_delay_ms */
     0,
     /* hugify_style */
-    hpa_hugify_style_lazy};
+    hpa_hugify_style_lazy,
+    /* use_pool */
+    false};
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
     /* slab_max_alloc */
@@ -67,7 +69,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
     /* min_purge_delay_ms */
     0,
     /* hugify_style */
-    hpa_hugify_style_lazy};
+    hpa_hugify_style_lazy,
+    /* use_pool */
+    false};
 
 static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
     /* slab_max_alloc */
@@ -91,7 +95,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
     /* min_purge_delay_ms */
     10,
     /* hugify_style */
-    hpa_hugify_style_eager};
+    hpa_hugify_style_eager,
+    /* use_pool */
+    false};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
diff --git a/test/unit/hpa_central_pool.c b/test/unit/hpa_central_pool.c
new file mode 100644
index 0000000000..79fd22c226
--- /dev/null
+++ b/test/unit/hpa_central_pool.c
@@ -0,0 +1,329 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+#define SHARD_IND2 112
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+         * Must be the first member -- we convert back and forth between the
+         * test_data_t and the hpa_shard_t;
+         */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    0,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    HUGEPAGE,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_eager,
+    /* use_pool */
+    true};
+
+static hpa_shard_t *
+create_test_data(
+    hpa_central_t *central, hpa_shard_opts_t *opts, unsigned int shard_ind) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ shard_ind,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, central, &test_data->emap,
+	    test_data->base, &test_data->shard_edata_cache, shard_ind, opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	npurge_size = size;
+	++ndefer_purge_calls;
+}
+
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_central_pool) {
+	test_skip_if(!hpa_supported() || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_central_t central;
+	base_t       *central_base = base_new(TSDN_NULL, /* ind */ 1234,
+	          &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(central_base, "");
+	hpa_central_init(&central, central_base, &hooks);
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard1 = create_test_data(&central, &opts, SHARD_IND);
+	hpa_shard_t *shard2 = create_test_data(&central, &opts, SHARD_IND2);
+
+	bool deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard1->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Remember the page */
+	hpdata_t *ps = psset_pick_alloc(&shard1->psset, PAGE);
+	expect_true(hpdata_huge_get(ps), "Should be huge as we start as huge");
+
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard1->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard1);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(
+	    0, ndefer_purge_calls, "Should donate, not purge delay=0ms");
+
+	/* Stats should not include the page */
+	expect_zu_eq(shard1->psset.stats.merged.nactive, 0, "");
+	expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, "Non huge");
+	npurge_size = 0;
+
+	/* Make allocation on second shard */
+	edata_t *edata2 = pai_alloc(tsdn, &shard2->pai, PAGE, PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata2, "Unexpected null edata");
+	expect_zu_eq(shard2->psset.stats.merged.nactive, 1, "");
+	hpdata_t *ps2 = psset_pick_alloc(&shard2->psset, PAGE);
+	expect_ptr_eq(
+	    ps, ps2, "Expected to get the same page via central pool");
+	expect_true(hpdata_huge_get(ps2), "Should still be huge");
+
+	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, "");
+	pai_dalloc(tsdn, &shard2->pai, edata2, &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard1);
+	expect_zu_eq(0, ndefer_purge_calls, "No purge, no donate, delay==0ms");
+	hpa_shard_do_deferred_work(tsdn, shard2);
+	expect_zu_eq(0, ndefer_purge_calls, "No purge, yes donate, delay==0ms");
+
+	/* Move the time above hard coded limit of 10s */
+	nstime_iadd(&defer_curtime, UINT64_C(30) * 1000 * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard2);
+	expect_zu_eq(1, ndefer_purge_calls, "Purged, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Should purge full folio");
+	expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, "");
+	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 0, "");
+	/* now alloc again and still get the same page */
+	edata2 = pai_alloc(tsdn, &shard2->pai, PAGE, PAGE, false, false, false,
+	    &deferred_work_generated);
+	expect_ptr_not_null(edata2, "Unexpected null edata");
+	expect_zu_eq(shard2->psset.stats.merged.nactive, 1, "");
+	ps2 = psset_pick_alloc(&shard2->psset, PAGE);
+	expect_ptr_eq(
+	    ps, ps2, "Expected to get the same page via central pool");
+	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, "");
+	pai_dalloc(tsdn, &shard2->pai, edata2, &deferred_work_generated);
+
+	npurge_size = 0;
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard1);
+	destroy_test_data(shard2);
+	base_delete(TSDN_NULL, central_base);
+}
+TEST_END
+
+TEST_BEGIN(test_central_pool_with_delay) {
+	test_skip_if(!hpa_supported() || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 1000;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_central_t central;
+	base_t       *central_base = base_new(TSDN_NULL, /* ind */ 1234,
+	          &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(central_base, "");
+	hpa_central_init(&central, central_base, &hooks);
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard1 = create_test_data(&central, &opts, SHARD_IND);
+	hpa_shard_t *shard2 = create_test_data(&central, &opts, SHARD_IND2);
+
+	bool deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard1->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Remember the page */
+	hpdata_t *ps = psset_pick_alloc(&shard1->psset, PAGE);
+	expect_true(hpdata_huge_get(ps), "Should be huge as we start as huge");
+
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard1->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard1);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(0, ndefer_purge_calls, "No purge, no donation delay=0ms");
+
+	/* Stats should include the page */
+	expect_zu_eq(shard1->psset.stats.merged.nactive, 0, "");
+	expect_zu_eq(shard1->psset.stats.merged.npageslabs, 1, "");
+
+	/* One more second passed */
+	nstime_iadd(&defer_curtime, UINT64_C(1000) * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard1);
+	expect_zu_eq(0, ndefer_purge_calls, "No purge, donation");
+	/* Stats should not include the page */
+	expect_zu_eq(shard1->psset.stats.merged.nactive, 0, "");
+	expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, "");
+	/* Make allocation on second shard */
+	edata_t *edata2 = pai_alloc(tsdn, &shard2->pai, PAGE, PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata2, "Unexpected null edata");
+	expect_zu_eq(shard2->psset.stats.merged.nactive, 1, "");
+	hpdata_t *ps2 = psset_pick_alloc(&shard2->psset, PAGE);
+	expect_ptr_eq(
+	    ps, ps2, "Expected to get the same page via central pool");
+	expect_true(hpdata_huge_get(ps2), "Should still be huge");
+	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, "");
+
+	npurge_size = 0;
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard1);
+	destroy_test_data(shard2);
+	base_delete(TSDN_NULL, central_base);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_central_pool, test_central_pool_with_delay);
+}
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 2121de49af..6566089a82 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -43,7 +43,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* purge_delay_ms */
     0,
     /* hugify_style */
-    hpa_hugify_style_lazy};
+    hpa_hugify_style_lazy,
+    /* use_pool */
+    false};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index e92988dec4..296f06a8e1 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -44,7 +44,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_delay_ms */
     0,
     /* hugify_style */
-    hpa_hugify_style_lazy};
+    hpa_hugify_style_lazy,
+    /* use_pool */
+    false};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4c11e4857d..b6d2107190 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -318,6 +318,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
 	TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always);
 	TEST_MALLCTL_OPT(const char *, hpa_hugify_style, always);
+	TEST_MALLCTL_OPT(bool, hpa_use_pool, always);
+	TEST_MALLCTL_OPT(uint64_t, hpa_pool_purge_delay_ms, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
@@ -1076,6 +1078,8 @@ TEST_BEGIN(test_stats_arenas_hpa_shard_counters) {
 	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurges);
 	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, nhugifies);
 	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, ndehugifies);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, ndonated_ps);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, nborrowed_ps);
 
 #undef TEST_STATS_ARENAS_HPA_SHARD_COUNTERS
 }

From ba01d0409cc47dfa31b72740c3aa6549aaaa0c9a Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 9 Dec 2025 18:08:29 -0800
Subject: [PATCH 2/2] Immediate sharing of a page

---
 include/jemalloc/internal/arena_externs.h |   4 +-
 include/jemalloc/internal/hpa_central.h   |  14 ++-
 src/arena.c                               |  22 +++-
 src/background_thread.c                   |  20 +++-
 src/hpa.c                                 |  95 +++++++++--------
 src/hpa_central.c                         |  97 ++++++++++++-----
 test/unit/hpa_central_pool.c              | 120 ++++------------------
 test/unit/hpa_central_pool.sh             |   3 +
 8 files changed, 190 insertions(+), 185 deletions(-)
 create mode 100644 test/unit/hpa_central_pool.sh

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index da55e646c5..cef41c9fba 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -46,7 +46,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
-    hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+    hpa_shard_stats_t *hpastats);
 void arena_stats_global_central_read(tsdn_t *tsdn, hpa_central_stats_t *stats);
 void arena_stats_global_central_mutex_read(
     tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data);
@@ -67,6 +67,8 @@ void    arena_decay(
        tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
 uint64_t       arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
 void           arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
+void           arena_central_do_deferred_work(tsdn_t *tsdn);
+uint64_t       arena_central_time_until_deferred_work(tsdn_t *tsdn);
 void           arena_reset(tsd_t *tsd, arena_t *arena);
 void           arena_destroy(tsd_t *tsd, arena_t *arena);
 cache_bin_sz_t arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
index 947c5463ed..16f0756ea4 100644
--- a/include/jemalloc/internal/hpa_central.h
+++ b/include/jemalloc/internal/hpa_central.h
@@ -14,7 +14,7 @@ struct hpa_pool_s {
 	 * Pool of empty huge pages to be shared between shards that are
 	 * participating.
 	 *
-	 * Page is owned by the  pool if it lives in one of these two lists.
+	 * Page is owned by the pool if it lives in one of these two lists.
 	 * This means that it should not be part of any hpa_shard's psset at the
 	 * same time.
 	 */
@@ -68,16 +68,20 @@ bool hpa_central_init(
 hpdata_t *hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
     uint64_t age, bool hugify_eager, bool *oom);
 
-/* Donate empty pages to central */
-void hpa_central_ps_insert(tsdn_t *tsdn, hpa_central_t *central,
-    hpdata_empty_list_t *pages, const nstime_t *now);
+/* Donate empty page to central */
+void hpa_central_donate(
+    tsdn_t *tsdn, hpa_central_t *central, hpdata_t *ps, const nstime_t *now);
 /* Get empty page from central without growing it */
-hpdata_t *hpa_central_ps_pop(tsdn_t *tsdn, hpa_central_t *central);
+hpdata_t *hpa_central_borrow(tsdn_t *tsdn, hpa_central_t *central);
 
 /* Purge up to max_ps empty pages in the central */
 size_t hpa_central_purge(
     tsdn_t *tsdn, hpa_central_t *central, const nstime_t *now, size_t max_ps);
 
+/* Get time in nanoseconds until central pool needs deferred work */
+uint64_t hpa_central_time_until_deferred_work(
+    tsdn_t *tsdn, hpa_central_t *central);
+
 void hpa_central_prefork(tsdn_t *tsdn, hpa_central_t *central);
 void hpa_central_postfork_parent(tsdn_t *tsdn, hpa_central_t *central);
 void hpa_central_postfork_child(tsdn_t *tsdn, hpa_central_t *central);
diff --git a/src/arena.c b/src/arena.c
index 261dbbd17c..5ee5f407ef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -227,7 +227,6 @@ arena_stats_global_central_mutex_read(
 	malloc_mutex_unlock(tsdn, &arena_pa_central_global.hpa.pool_mtx);
 }
 
-
 static void
 arena_background_thread_inactivity_check(
     tsdn_t *tsdn, arena_t *arena, bool is_background_thread) {
@@ -628,6 +627,27 @@ arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	pa_shard_do_deferred_work(tsdn, &arena->pa_shard);
 }
 
+/* Called from background threads to purge central pool. */
+void
+arena_central_do_deferred_work(tsdn_t *tsdn) {
+	if (arena_pa_central_global.hpa.base == NULL) {
+		return;
+	}
+	nstime_t now;
+	arena_pa_central_global.hpa.hooks.curtime(
+	    &now, /* first_reading */ true);
+	hpa_central_purge(tsdn, &arena_pa_central_global.hpa, &now, SIZE_MAX);
+}
+
+uint64_t
+arena_central_time_until_deferred_work(tsdn_t *tsdn) {
+	if (arena_pa_central_global.hpa.base == NULL) {
+		return UINT64_MAX;
+	}
+	return hpa_central_time_until_deferred_work(
+	    tsdn, &arena_pa_central_global.hpa);
+}
+
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	bool deferred_work_generated = false;
diff --git a/src/background_thread.c b/src/background_thread.c
index 4901856a82..75826ed2bc 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -87,7 +87,9 @@ pthread_create_fptr_init(void) {
 
 #ifndef JEMALLOC_BACKGROUND_THREAD
 #	define NOT_REACHED                                                    \
-		{ not_reached(); }
+		{                                                              \
+			not_reached();                                         \
+		}
 bool
 background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
     bool background_threads_enable(tsd_t *tsd) NOT_REACHED
@@ -280,6 +282,22 @@ background_work_sleep_once(
 		}
 	}
 
+	/*
+	 * Handle central pool (shared across all arenas).
+	 * Multiple background threads may call this concurrently;
+	 * hpa_central functions handle synchronization internally.
+	 */
+	if (!slept_indefinitely) {
+		arena_central_do_deferred_work(tsdn);
+	}
+	if (ns_until_deferred > BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		uint64_t ns_central_deferred =
+		    arena_central_time_until_deferred_work(tsdn);
+		if (ns_central_deferred < ns_until_deferred) {
+			ns_until_deferred = ns_central_deferred;
+		}
+	}
+
 	uint64_t sleep_ns;
 	if (ns_until_deferred == BACKGROUND_THREAD_DEFERRED_MAX) {
 		sleep_ns = BACKGROUND_THREAD_INDEFINITE_SLEEP;
diff --git a/src/hpa.c b/src/hpa.c
index 53c24cd9ce..d3d0f2235f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -460,42 +460,6 @@ hpa_purge_finish_hp(
 	psset_update_end(&shard->psset, hp_item->hp);
 }
 
-static void
-hpa_donate_empty_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (!shard->opts.use_pool) {
-		return;
-	}
-
-	hpdata_empty_list_t to_donate;
-	hpdata_empty_list_init(&to_donate);
-	do {
-		hpdata_t *to_purge = (shard->opts.min_purge_delay_ms > 0)
-		    ? psset_pick_purge(
-		          &shard->psset, &shard->last_time_work_attempted)
-		    : psset_pick_purge(&shard->psset, NULL);
-
-		if (to_purge == NULL || !hpdata_empty(to_purge)) {
-			break;
-		}
-		assert(hpdata_ndirty_get(to_purge) > 0);
-
-		/* Donate the page to the pool */
-		psset_remove(&shard->psset, to_purge);
-		hpdata_empty_list_append(&to_donate, to_purge);
-		shard->stats.ndonated_ps++;
-	} while (true);
-
-	if (!hpdata_empty_list_empty(&to_donate)) {
-		nstime_t now;
-		nstime_copy(&now, &shard->last_time_work_attempted);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		hpa_central_ps_insert(tsdn, shard->central, &to_donate,
-		    &shard->last_time_work_attempted);
-		malloc_mutex_lock(tsdn, &shard->mtx);
-	}
-}
-
 /* Returns number of huge pages purged. */
 static inline size_t
 hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
@@ -515,8 +479,6 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 	};
 	assert(batch.range_watermark > 0);
 
-	hpa_donate_empty_ps(tsdn, shard);
-
 	while (1) {
 		hpa_batch_pass_start(&batch);
 		assert(hpa_batch_empty(&batch));
@@ -684,7 +646,7 @@ hpa_shard_maybe_do_deferred_work(
 			max_purges = max_purge_nhp;
 		}
 
-		if (shard->opts.use_pool) {
+		if (shard->opts.use_pool && !shard->opts.deferral_allowed) {
 			size_t         max_pool_ops = (forced ? (size_t)-1 : 8);
 			hpa_central_t *central = shard->central;
 			nstime_t       now;
@@ -711,16 +673,49 @@ hpa_shard_maybe_do_deferred_work(
 }
 
 static void
-hpa_add_pool_page_to_psset(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
-	assert(hpdata_alloc_allowed_get(ps) && hpdata_empty(ps)
-	    && hpdata_consistent(ps));
-	if (hpdata_purge_allowed_get(ps)) {
+hpa_borrow_page_from_pool(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	hpdata_assert_consistent(ps);
+	assert(hpdata_empty(ps));
+	assert(!hpdata_updating_get(ps));
+	assert(!hpdata_changing_state_get(ps));
+	assert(hpdata_alloc_allowed_get(ps));
+
+	/* Page will be purgeable per this shard's rules. */
+	bool purgable = hpa_good_purge_candidate(shard, ps);
+	hpdata_purge_allowed_set(ps, purgable);
+	if (purgable) {
 		hpa_update_purgable_time(shard, ps);
-		if (hpdata_huge_get(ps)) {
-			shard->stats.nborrowed_ps++;
-		}
 	}
+	/* Page is empty ensure we do not add to hugify list */
+	assert(!hpdata_hugify_allowed_get(ps));
+	shard->stats.nborrowed_ps++;
 	psset_insert(&shard->psset, ps);
+	hpdata_assert_consistent(ps);
+}
+
+static void
+hpa_donate_ps_to_pool(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	nstime_t now;
+	shard->central->hooks.curtime(&now, /* first_reading */ true);
+
+	hpdata_assert_consistent(ps);
+	assert(hpdata_empty(ps));
+	assert(!hpdata_updating_get(ps));
+	assert(!hpdata_changing_state_get(ps));
+	assert(hpdata_alloc_allowed_get(ps));
+	assert(hpdata_ndirty_get(ps) > 0);
+	assert(!hpdata_hugify_allowed_get(ps));
+
+	psset_remove(&shard->psset, ps);
+	shard->stats.ndonated_ps++;
+	hpdata_assert_consistent(ps);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	hpa_central_donate(tsdn, shard->central, ps, &now);
+	malloc_mutex_lock(tsdn, &shard->mtx);
 }
 
 static edata_t *
@@ -737,9 +732,9 @@ hpa_try_alloc_one_no_grow(
 
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL && shard->opts.use_pool) {
-		ps = hpa_central_ps_pop(tsdn, shard->central);
+		ps = hpa_central_borrow(tsdn, shard->central);
 		if (ps != NULL) {
-			hpa_add_pool_page_to_psset(tsdn, shard, ps);
+			hpa_borrow_page_from_pool(tsdn, shard, ps);
 		}
 	}
 	if (ps == NULL) {
@@ -1058,6 +1053,10 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	    hpdata_nactive_get(ps), hpdata_age_get(ps));
 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
+	if (hpdata_empty(ps) && shard->opts.use_pool
+	    && !hpdata_changing_state_get(ps)) {
+		hpa_donate_ps_to_pool(tsdn, shard, ps);
+	}
 }
 
 static void
diff --git a/src/hpa_central.c b/src/hpa_central.c
index 281e265ea2..75b47a3d4b 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -28,23 +28,13 @@ hpa_central_stats_read(
 	malloc_mutex_unlock(tsdn, &central->pool_mtx);
 }
 
-static inline void
-hpa_central_pool_concat_nonpurged(tsdn_t *tsdn, hpa_central_t *central,
-    hpdata_empty_list_t *pages, size_t new_dirty) {
-	malloc_mutex_lock(tsdn, &central->pool_mtx);
-	hpdata_empty_list_concat(&central->pool.nonpurged, pages);
-	central->stats.ndirty_pool += new_dirty;
-	malloc_mutex_unlock(tsdn, &central->pool_mtx);
-}
-
 static void
 hpa_central_get_nonpurged(tsdn_t *tsdn, hpa_central_t *central,
     const nstime_t *now, hpa_purge_batch_t *batch) {
 	malloc_mutex_lock(tsdn, &central->pool_mtx);
 	while (!hpa_batch_full(batch)
 	    && !hpdata_empty_list_empty(&central->pool.nonpurged)) {
-		hpdata_t *ps = hpdata_empty_list_first(
-		    &central->pool.nonpurged);
+		hpdata_t *ps = hpdata_empty_list_last(&central->pool.nonpurged);
 		assert(hpdata_empty(ps) && hpdata_purge_allowed_get(ps));
 
 		const nstime_t *allowed = hpdata_time_purge_allowed_get(ps);
@@ -56,7 +46,8 @@ hpa_central_get_nonpurged(tsdn_t *tsdn, hpa_central_t *central,
 		hpa_purge_item_t *hp_item = &batch->items[batch->item_cnt];
 		batch->item_cnt++;
 		hp_item->hp = ps;
-		hp_item->dehugify = hpdata_huge_get(hp_item->hp);
+		/* We only deal with empty pages in the pool */
+		hp_item->dehugify = false;
 		size_t nranges;
 		hpdata_alloc_allowed_set(hp_item->hp, false);
 		size_t ndirty = hpdata_purge_begin(
@@ -78,8 +69,11 @@ hpa_central_put_purged(
 
 	for (size_t i = 0; i < batch->item_cnt; ++i) {
 		hpa_purge_item_t *hp_item = &batch->items[i];
-		if (hp_item->dehugify) {
+		/* Page was empty, so we just change the flag after purging */
+		if (hpdata_huge_get(hp_item->hp)) {
 			hpdata_dehugify(hp_item->hp);
+			hpdata_purged_when_empty_and_huge_set(
+			    hp_item->hp, true);
 		}
 		hpdata_purge_end(hp_item->hp, &hp_item->state);
 		hpdata_alloc_allowed_set(hp_item->hp, true);
@@ -96,33 +90,50 @@ hpa_central_put_purged(
 }
 
 void
-hpa_central_ps_insert(tsdn_t *tsdn, hpa_central_t *central,
-    hpdata_empty_list_t *pages, const nstime_t *now) {
-	assert(!hpdata_empty_list_empty(pages));
-
+hpa_central_donate(
+    tsdn_t *tsdn, hpa_central_t *central, hpdata_t *ps, const nstime_t *now) {
 	assert(now != NULL);
 	nstime_t purge_time;
 	nstime_copy(&purge_time, now);
 	uint64_t purge_delay_ns = opt_hpa_pool_purge_delay_ms * MILLION;
 	nstime_iadd(&purge_time, purge_delay_ns);
-
-	hpdata_t *ps;
-	size_t    new_dirty = 0;
-	ql_foreach (ps, &pages->head, ql_link_empty) {
-		assert(hpdata_empty(ps));
-		assert(hpdata_ndirty_get(ps) > 0);
-		hpdata_time_purge_allowed_set(ps, &purge_time);
-		new_dirty += hpdata_ndirty_get(ps);
-	}
-	hpa_central_pool_concat_nonpurged(tsdn, central, pages, new_dirty);
+	assert(hpdata_empty(ps));
+	assert(hpdata_ndirty_get(ps) > 0);
+	/*
+	 * Central pool purge policy: We expect to receive pages with ndirty > 0
+	 * from shards. Regardless of the source shard's purge settings
+	 * (including dirty_mult=-1), donated pages are marked as purgeable and
+	 * will be purged after hpa_pool_purge_delay_ms milliseconds. This
+	 * allows the central pool to reclaim memory independently of individual
+	 * shard policies.
+	 */
+	hpdata_purge_allowed_set(ps, true);
+	hpdata_time_purge_allowed_set(ps, &purge_time);
+	size_t new_dirty = hpdata_ndirty_get(ps);
+	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	central->stats.ndirty_pool += new_dirty;
+	/*
+	 * Insert at head (LIFO for insertion). This means newly donated pages
+	 * will be borrowed first (FIFO for borrowing at line 125), providing
+	 * better cache locality. Older pages accumulate at the tail and are
+	 * purged first (LIFO for purging at line 37).
+	 */
+	hpdata_empty_list_prepend(&central->pool.nonpurged, ps);
+	malloc_mutex_unlock(tsdn, &central->pool_mtx);
 }
 
 hpdata_t *
-hpa_central_ps_pop(tsdn_t *tsdn, hpa_central_t *central) {
+hpa_central_borrow(tsdn_t *tsdn, hpa_central_t *central) {
 	hpdata_t *ps = NULL;
 
 	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	/*
+	 * Prefer non-purged pages over purged ones. Non-purged pages are cheaper
+	 * to use (no need to fault pages back in) and allow purged pages to
+	 * remain as a reserve for when the pool is under pressure.
+	 */
 	if (!hpdata_empty_list_empty(&central->pool.nonpurged)) {
+		/* Take from front (FIFO) - gets most recently donated pages. */
 		ps = hpdata_empty_list_first(&central->pool.nonpurged);
 		hpdata_empty_list_remove(&central->pool.nonpurged, ps);
 	}
@@ -284,6 +295,36 @@ hpa_central_purge(
 	return batch.npurged_hp_total;
 }
 
+uint64_t
+hpa_central_time_until_deferred_work(tsdn_t *tsdn, hpa_central_t *central) {
+	nstime_t purge_allowed;
+	nstime_init_zero(&purge_allowed);
+
+	malloc_mutex_lock(tsdn, &central->pool_mtx);
+	if (!hpdata_empty_list_empty(&central->pool.nonpurged)) {
+		/* Get the last element (oldest in terms of insertion order) */
+		hpdata_t *ps = hpdata_empty_list_last(&central->pool.nonpurged);
+		nstime_copy(&purge_allowed, hpdata_time_purge_allowed_get(ps));
+	}
+	malloc_mutex_unlock(tsdn, &central->pool_mtx);
+
+	if (nstime_equals_zero(&purge_allowed)) {
+		/* No pages to purge */
+		return BACKGROUND_THREAD_DEFERRED_MAX;
+	}
+
+	nstime_t now;
+	central->hooks.curtime(&now, /* first_reading */ true);
+
+	if (nstime_compare(&purge_allowed, &now) <= 0) {
+		/* Already ready for purging */
+		return BACKGROUND_THREAD_DEFERRED_MIN;
+	}
+
+	/* Return nanoseconds until purge is allowed */
+	return nstime_ns_between(&now, &purge_allowed);
+}
+
 /*
  *No need to do any of below for central->grow_mtx as shard->grow_mtx must be
  * held to lock that one.
diff --git a/test/unit/hpa_central_pool.c b/test/unit/hpa_central_pool.c
index 79fd22c226..938153be24 100644
--- a/test/unit/hpa_central_pool.c
+++ b/test/unit/hpa_central_pool.c
@@ -67,8 +67,11 @@ create_test_data(
 	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
-	err = hpa_shard_init(&test_data->shard, central, &test_data->emap,
-	    test_data->base, &test_data->shard_edata_cache, shard_ind, opts);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	err = hpa_shard_init(tsdn, &test_data->shard, central, &test_data->emap,
+			     test_data->base, &test_data->shard_edata_cache, shard_ind, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -177,7 +180,7 @@ TEST_BEGIN(test_central_pool) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/* Remember the page */
-	hpdata_t *ps = psset_pick_alloc(&shard1->psset, PAGE);
+	hpdata_t *ps = edata_ps_get(edatas[0]);
 	expect_true(hpdata_huge_get(ps), "Should be huge as we start as huge");
 
 	/* Deallocate all */
@@ -186,9 +189,8 @@ TEST_BEGIN(test_central_pool) {
 		    tsdn, &shard1->pai, edatas[i], &deferred_work_generated);
 	}
 	hpa_shard_do_deferred_work(tsdn, shard1);
-	expect_true(deferred_work_generated, "");
-	expect_zu_eq(
-	    0, ndefer_purge_calls, "Should donate, not purge delay=0ms");
+	expect_false(deferred_work_generated, "Page was donated on dalloc");
+	expect_zu_eq(0, ndefer_purge_calls, "Should donate, not purge");
 
 	/* Stats should not include the page */
 	expect_zu_eq(shard1->psset.stats.merged.nactive, 0, "");
@@ -200,25 +202,24 @@ TEST_BEGIN(test_central_pool) {
 	    false, false, &deferred_work_generated);
 	expect_ptr_not_null(edata2, "Unexpected null edata");
 	expect_zu_eq(shard2->psset.stats.merged.nactive, 1, "");
-	hpdata_t *ps2 = psset_pick_alloc(&shard2->psset, PAGE);
-	expect_ptr_eq(
-	    ps, ps2, "Expected to get the same page via central pool");
+	hpdata_t *ps2 = edata_ps_get(edata2);
+	expect_ptr_eq(ps, ps2, "Expected to get the same page via pool");
 	expect_true(hpdata_huge_get(ps2), "Should still be huge");
 
 	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, "");
 	pai_dalloc(tsdn, &shard2->pai, edata2, &deferred_work_generated);
-	expect_true(deferred_work_generated, "");
+	expect_false(deferred_work_generated, "Page donated to the pool");
 	ndefer_purge_calls = 0;
 	npurge_size = 0;
 	hpa_shard_do_deferred_work(tsdn, shard1);
-	expect_zu_eq(0, ndefer_purge_calls, "No purge, no donate, delay==0ms");
+	expect_zu_eq(0, ndefer_purge_calls, "Empty shard");
 	hpa_shard_do_deferred_work(tsdn, shard2);
-	expect_zu_eq(0, ndefer_purge_calls, "No purge, yes donate, delay==0ms");
+	expect_zu_eq(0, ndefer_purge_calls, "Empty shard");
 
-	/* Move the time above hard coded limit of 10s */
+	/* Move the time above limit of 10s we passed in MALLOC_CONF */
 	nstime_iadd(&defer_curtime, UINT64_C(30) * 1000 * 1000 * 1000);
-	hpa_shard_do_deferred_work(tsdn, shard2);
-	expect_zu_eq(1, ndefer_purge_calls, "Purged, delay==0ms");
+	hpa_central_purge(tsdn, &central, &defer_curtime, SIZE_MAX);
+	expect_zu_eq(1, ndefer_purge_calls, "Purged, delay==10ms");
 	expect_zu_eq(HUGEPAGE, npurge_size, "Should purge full folio");
 	expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, "");
 	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 0, "");
@@ -227,9 +228,8 @@ TEST_BEGIN(test_central_pool) {
 	    &deferred_work_generated);
 	expect_ptr_not_null(edata2, "Unexpected null edata");
 	expect_zu_eq(shard2->psset.stats.merged.nactive, 1, "");
-	ps2 = psset_pick_alloc(&shard2->psset, PAGE);
-	expect_ptr_eq(
-	    ps, ps2, "Expected to get the same page via central pool");
+	ps2 = edata_ps_get(edata2);
+	expect_ptr_eq(ps, ps2, "Expected to get the same page via pool");
 	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, "");
 	pai_dalloc(tsdn, &shard2->pai, edata2, &deferred_work_generated);
 
@@ -241,89 +241,7 @@ TEST_BEGIN(test_central_pool) {
 }
 TEST_END
 
-TEST_BEGIN(test_central_pool_with_delay) {
-	test_skip_if(!hpa_supported() || !config_stats);
-
-	hpa_hooks_t hooks;
-	hooks.map = &defer_test_map;
-	hooks.unmap = &defer_test_unmap;
-	hooks.purge = &defer_test_purge;
-	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
-	hooks.curtime = &defer_test_curtime;
-	hooks.ms_since = &defer_test_ms_since;
-	hooks.vectorized_purge = &defer_vectorized_purge;
-
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.deferral_allowed = true;
-	opts.purge_threshold = HUGEPAGE;
-	opts.min_purge_delay_ms = 1000;
-	opts.min_purge_interval_ms = 0;
-
-	hpa_central_t central;
-	base_t       *central_base = base_new(TSDN_NULL, /* ind */ 1234,
-	          &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
-	assert_ptr_not_null(central_base, "");
-	hpa_central_init(&central, central_base, &hooks);
-	ndefer_purge_calls = 0;
-	hpa_shard_t *shard1 = create_test_data(&central, &opts, SHARD_IND);
-	hpa_shard_t *shard2 = create_test_data(&central, &opts, SHARD_IND2);
-
-	bool deferred_work_generated = false;
-	nstime_init(&defer_curtime, 10 * 1000 * 1000);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum { NALLOCS = HUGEPAGE_PAGES };
-	edata_t *edatas[NALLOCS];
-	for (int i = 0; i < NALLOCS / 2; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard1->pai, PAGE, PAGE, false,
-		    false, false, &deferred_work_generated);
-		expect_ptr_not_null(edatas[i], "Unexpected null edata");
-	}
-	/* Remember the page */
-	hpdata_t *ps = psset_pick_alloc(&shard1->psset, PAGE);
-	expect_true(hpdata_huge_get(ps), "Should be huge as we start as huge");
-
-	/* Deallocate all */
-	for (int i = 0; i < NALLOCS / 2; i++) {
-		pai_dalloc(
-		    tsdn, &shard1->pai, edatas[i], &deferred_work_generated);
-	}
-	hpa_shard_do_deferred_work(tsdn, shard1);
-	expect_true(deferred_work_generated, "");
-	expect_zu_eq(0, ndefer_purge_calls, "No purge, no donation delay=0ms");
-
-	/* Stats should include the page */
-	expect_zu_eq(shard1->psset.stats.merged.nactive, 0, "");
-	expect_zu_eq(shard1->psset.stats.merged.npageslabs, 1, "");
-
-	/* One more second passed */
-	nstime_iadd(&defer_curtime, UINT64_C(1000) * 1000 * 1000);
-	hpa_shard_do_deferred_work(tsdn, shard1);
-	expect_zu_eq(0, ndefer_purge_calls, "No purge, donation");
-	/* Stats should not include the page */
-	expect_zu_eq(shard1->psset.stats.merged.nactive, 0, "");
-	expect_zu_eq(shard1->psset.stats.merged.npageslabs, 0, "");
-	/* Make allocation on second shard */
-	edata_t *edata2 = pai_alloc(tsdn, &shard2->pai, PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
-	expect_ptr_not_null(edata2, "Unexpected null edata");
-	expect_zu_eq(shard2->psset.stats.merged.nactive, 1, "");
-	hpdata_t *ps2 = psset_pick_alloc(&shard2->psset, PAGE);
-	expect_ptr_eq(
-	    ps, ps2, "Expected to get the same page via central pool");
-	expect_true(hpdata_huge_get(ps2), "Should still be huge");
-	expect_zu_eq(shard2->psset.stats.merged.npageslabs, 1, "");
-
-	npurge_size = 0;
-	ndefer_purge_calls = 0;
-	destroy_test_data(shard1);
-	destroy_test_data(shard2);
-	base_delete(TSDN_NULL, central_base);
-}
-TEST_END
-
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_central_pool, test_central_pool_with_delay);
+	return test_no_reentrancy(test_central_pool);
 }
diff --git a/test/unit/hpa_central_pool.sh b/test/unit/hpa_central_pool.sh
new file mode 100644
index 0000000000..61e97a6adb
--- /dev/null
+++ b/test/unit/hpa_central_pool.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="hpa:true,hpa_use_pool:true,hpa_pool_purge_delay_ms:10000"