From fabf105744090defe7d40e7278f1912452c1e977 Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Thu, 11 Dec 2025 13:56:05 +0100 Subject: [PATCH 1/3] Attempt to fix a flaky coroutine-dump-verifying test Fixes #4418 (unless it keeps happening) This problem couldn't be reproduced locally, to this fix is purely analytical. The problematic test attempts to launch a coroutine then await until the coroutine suspends. The way it was doing that before the change is: - Hold a monitor and `wait` on the test body side; - Acquire a monitor and `notify` on the coroutine side *right before* the suspension point; - On the test body side, wait for the coroutine thread to enter the `TIMED_WAIT` state, indicating that its scheduler worker has finished its piece of work and now waits for new commands, which must mean the suspension point was reached. The problem is that thread states are not synchronization primitives, and no happens-before is established between the code a thread executes before the state change and the code right after the state change is observed. With this change, we establish a complete happens-before chain: - The test body wakes up after it's `resume`d as a coroutine. - `complete` on a latch happens-before the `resume`. - The suspension happens-before the `complete`, as suspension and the `complete` are done in the same thread. With no way to verify the fix, it's unclear if that was the problem, so we can only hope the change helps. --- .../test/CoroutinesDumpTest.kt | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt b/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt index f1b9a6d498..b795f52470 100644 --- a/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt +++ b/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt @@ -2,8 +2,6 @@ package kotlinx.coroutines.debug import kotlinx.coroutines.testing.* import kotlinx.coroutines.* -import org.junit.* -import org.junit.Test import kotlin.coroutines.* import kotlin.test.* @@ -11,7 +9,7 @@ class CoroutinesDumpTest : DebugTestBase() { private val monitor = Any() private var coroutineThread: Thread? = null // guarded by monitor - @Before + @BeforeTest override fun setUp() { super.setUp() DebugProbes.enableCreationStackTraces = true @@ -19,11 +17,13 @@ class CoroutinesDumpTest : DebugTestBase() { @Test fun testSuspendedCoroutine() = runBlocking { - val deferred = async(Dispatchers.Default) { - sleepingOuterMethod() + val latch = CompletableDeferred() + val singleThreadedDispatcher = newSingleThreadContext("TestCoroutineThread") + val deferred = async(singleThreadedDispatcher) { + sleepingOuterMethod(singleThreadedDispatcher, latch) } - awaitCoroutine() + latch.await() val found = DebugProbes.dumpCoroutinesInfo().single { it.job === deferred } verifyDump( "Coroutine \"coroutine#1\":DeferredCoroutine{Active}@1e4a7dd4, state: SUSPENDED\n" + @@ -35,6 +35,7 @@ class CoroutinesDumpTest : DebugTestBase() { "\tat kotlinx.coroutines.CoroutineStart.invoke(CoroutineStart.kt)\n", ignoredCoroutine = "BlockingCoroutine" ) { + singleThreadedDispatcher.close() deferred.cancel() coroutineThread!!.interrupt() } @@ -181,7 +182,7 @@ class CoroutinesDumpTest : DebugTestBase() { private suspend fun nestedActiveMethod(shouldSuspend: Boolean) { if (shouldSuspend) yield() notifyCoroutineStarted() - while (coroutineContext[Job]!!.isActive) { + while (currentCoroutineContext()[Job]!!.isActive) { try { Thread.sleep(60_000) } catch (_ : InterruptedException) { @@ -189,14 +190,20 @@ class CoroutinesDumpTest : DebugTestBase() { } } - private suspend fun sleepingOuterMethod() { - sleepingNestedMethod() + private suspend fun sleepingOuterMethod(currentDispatcher: CoroutineDispatcher, latch: CompletableDeferred) { + sleepingNestedMethod(currentDispatcher, latch) yield() // TCE } - private suspend fun sleepingNestedMethod() { + private suspend fun sleepingNestedMethod(currentDispatcher: CoroutineDispatcher, latch: CompletableDeferred) { yield() // Suspension point - notifyCoroutineStarted() + /* Schedule a computation on the current single-threaded dispatcher. + Since that thread is currently running this code, + the start notification will happen *after* the currently running coroutine suspends. */ + currentDispatcher.dispatch(currentDispatcher) { + coroutineThread = Thread.currentThread() + latch.complete(Unit) + } delay(Long.MAX_VALUE) } From 3c44b9ff11d9856272d5c7f630eb69160c98951c Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Mon, 15 Dec 2025 12:09:36 +0100 Subject: [PATCH 2/3] Clarify the yield() calls --- kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt b/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt index b795f52470..a6e3ccef1a 100644 --- a/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt +++ b/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt @@ -2,7 +2,6 @@ package kotlinx.coroutines.debug import kotlinx.coroutines.testing.* import kotlinx.coroutines.* -import kotlin.coroutines.* import kotlin.test.* class CoroutinesDumpTest : DebugTestBase() { @@ -192,11 +191,10 @@ class CoroutinesDumpTest : DebugTestBase() { private suspend fun sleepingOuterMethod(currentDispatcher: CoroutineDispatcher, latch: CompletableDeferred) { sleepingNestedMethod(currentDispatcher, latch) - yield() // TCE + yield() // TCE: make sure `sleepingOuterMethod` is contained in the continuation of `sleepingNestedMethod` } private suspend fun sleepingNestedMethod(currentDispatcher: CoroutineDispatcher, latch: CompletableDeferred) { - yield() // Suspension point /* Schedule a computation on the current single-threaded dispatcher. Since that thread is currently running this code, the start notification will happen *after* the currently running coroutine suspends. */ @@ -205,6 +203,7 @@ class CoroutinesDumpTest : DebugTestBase() { latch.complete(Unit) } delay(Long.MAX_VALUE) + yield() // TCE: make sure `sleepingNestedMethod` is contained in the continuation of `delay` } private fun awaitCoroutine() = synchronized(monitor) { From e91c3bd13d177238e54964077591761b92d91cc6 Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Wed, 17 Dec 2025 09:51:39 +0100 Subject: [PATCH 3/3] Clarify that awaitCoroutine() is not establishing happens-before --- kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt b/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt index a6e3ccef1a..6f91fadde2 100644 --- a/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt +++ b/kotlinx-coroutines-debug/test/CoroutinesDumpTest.kt @@ -209,7 +209,11 @@ class CoroutinesDumpTest : DebugTestBase() { private fun awaitCoroutine() = synchronized(monitor) { while (coroutineThread == null) (monitor as Object).wait() while (coroutineThread!!.state != Thread.State.TIMED_WAITING) { - // Wait until thread sleeps to have a consistent stacktrace + /* Wait until the thread we're awaiting goes to sleep. + Note: this does not establish a happens-before relationship with the thread entering `Thread.sleep`. + There still doesn't seem to be any guarantee that the stacktrace will be accurately updated. + says that how accurate + a stacktrace is is implementation-dependent. */ } }