test(core): add test for response_metadata in streamEvents (#9589)

nathannewyen · christian-bromann · web-flow · commit 8cc81c7cee69 · 2025-12-08T13:26:51.000-08:00
Co-authored-by: Christian Bromann &lt;git@bromann.dev&gt;
diff --git a/.changeset/ninety-penguins-lie.md b/.changeset/ninety-penguins-lie.md
@@ -0,0 +1,5 @@
+---
+"@langchain/core": patch
+---
+
+test(core): add test for response_metadata in streamEvents
diff --git a/libs/langchain-core/src/runnables/tests/runnable_stream_events_v2.test.ts b/libs/langchain-core/src/runnables/tests/runnable_stream_events_v2.test.ts
@@ -136,6 +136,42 @@ test("Runnable streamEvents method on a chat model", async () => {
   ]);
 });
 
+test("Runnable streamEvents should preserve response_metadata from generationInfo", async () => {
+  // Test for issue #8470: streamEvents doesn't return response_metadata
+  // This verifies that generationInfo (which contains finish_reason, usage, etc.)
+  // is properly merged into response_metadata and surfaced in stream events
+  const model = new FakeListChatModel({
+    responses: ["abc"],
+    generationInfo: {
+      finish_reason: "stop",
+      model_name: "test-model",
+      usage: { prompt_tokens: 10, completion_tokens: 5 },
+    },
+  });
+
+  const events = [];
+  const eventStream = await model.streamEvents("hello", { version: "v2" });
+  for await (const event of eventStream) {
+    events.push(event);
+  }
+
+  // Find the on_chat_model_end event
+  const endEvent = events.find(
+    (e: { event: string }) => e.event === "on_chat_model_end"
+  );
+  expect(endEvent).toBeDefined();
+
+  // Verify response_metadata contains the generationInfo data
+  const output = (endEvent as { data: { output: AIMessageChunk } }).data.output;
+  expect(output.response_metadata).toBeDefined();
+  expect(output.response_metadata.finish_reason).toBe("stop");
+  expect(output.response_metadata.model_name).toBe("test-model");
+  expect(output.response_metadata.usage).toEqual({
+    prompt_tokens: 10,
+    completion_tokens: 5,
+  });
+});
+
 test("Runnable streamEvents call nested in another runnable + passed callbacks should still work", async () => {
   AsyncLocalStorageProviderSingleton.initializeGlobalInstance(
     new AsyncLocalStorage()
diff --git a/libs/langchain-core/src/utils/testing/chat_models.ts b/libs/langchain-core/src/utils/testing/chat_models.ts
@@ -284,6 +284,13 @@ export interface FakeChatInput extends BaseChatModelParams {
   sleep?: number;
 
   emitCustomEvent?: boolean;
+
+  /**
+   * Generation info to include on the last chunk during streaming.
+   * This gets merged into response_metadata by the base chat model.
+   * Useful for testing response_metadata propagation (e.g., finish_reason).
+   */
+  generationInfo?: Record<string, unknown>;
 }
 
 export interface FakeListChatModelCallOptions extends BaseChatModelCallOptions {
@@ -325,12 +332,15 @@ export class FakeListChatModel extends BaseChatModel<FakeListChatModelCallOption
 
   emitCustomEvent = false;
 
+  generationInfo?: Record<string, unknown>;
+
   constructor(params: FakeChatInput) {
     super(params);
-    const { responses, sleep, emitCustomEvent } = params;
+    const { responses, sleep, emitCustomEvent, generationInfo } = params;
     this.responses = responses;
     this.sleep = sleep;
     this.emitCustomEvent = emitCustomEvent ?? this.emitCustomEvent;
+    this.generationInfo = generationInfo;
   }
 
   _combineLLMOutput() {
@@ -391,12 +401,20 @@ export class FakeListChatModel extends BaseChatModel<FakeListChatModelCallOption
       });
     }
 
-    for await (const text of response) {
+    const responseChars = [...response];
+    for (let i = 0; i < responseChars.length; i++) {
+      const text = responseChars[i];
+      const isLastChunk = i === responseChars.length - 1;
       await this._sleepIfRequested();
       if (options?.thrownErrorString) {
         throw new Error(options.thrownErrorString);
       }
-      const chunk = this._createResponseChunk(text);
+      // Include generationInfo on the last chunk (like real providers do)
+      // This gets merged into response_metadata by the base chat model
+      const chunk = this._createResponseChunk(
+        text,
+        isLastChunk ? this.generationInfo : undefined
+      );
       yield chunk;
       // eslint-disable-next-line no-void
       void runManager?.handleLLMNewToken(text);
@@ -415,10 +433,15 @@ export class FakeListChatModel extends BaseChatModel<FakeListChatModelCallOption
     });
   }
 
-  _createResponseChunk(text: string): ChatGenerationChunk {
+  _createResponseChunk(
+    text: string,
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    generationInfo?: Record<string, any>
+  ): ChatGenerationChunk {
     return new ChatGenerationChunk({
       message: new AIMessageChunk({ content: text }),
       text,
+      generationInfo,
     });
   }
 

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"@langchain/core": patch
 +---
++
 +test(core): add test for response_metadata in streamEvents