Skip to content

Commit 6a8145a

Browse files
authored
Provide Mocked MCP tool answers (#12534)
* changes that are not in this branch for some reason * Manual chat completion, provide tool results ourselves. * Function name normalization and dictionary * Update methods * small fixes * variable error
1 parent 0743d91 commit 6a8145a

File tree

4 files changed

+95
-21
lines changed

4 files changed

+95
-21
lines changed

tools/ai-evals/azsdk-mcp/Helpers/ChatCompletion.cs

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using Microsoft.Extensions.AI;
2+
using Microsoft.VisualStudio.TestPlatform.CommunicationUtilities;
23
using ModelContextProtocol.Client;
34
using ModelContextProtocol.Protocol;
45

@@ -20,7 +21,6 @@ public async Task<ChatResponse> GetChatResponseAsync(IEnumerable<ChatMessage> ch
2021
var tools = await _mcpClient.ListToolsAsync();
2122
var result = new List<ChatResponseUpdate>();
2223
var toolsCalled = new HashSet<string>();
23-
2424
var chatOptions =
2525
new ChatOptions
2626
{
@@ -29,23 +29,22 @@ public async Task<ChatResponse> GetChatResponseAsync(IEnumerable<ChatMessage> ch
2929

3030
// GetResponseAsync allows the LLM to do too much. Limit it to the number of tools we expect
3131
// Not including calling a tool again.
32-
await foreach(var message in _chatClient.GetStreamingResponseAsync(chat, chatOptions))
32+
await foreach (var message in _chatClient.GetStreamingResponseAsync(chat, chatOptions))
3333
{
34-
foreach(var content in message.Contents)
34+
foreach (var content in message.Contents)
3535
{
3636
if (content is FunctionCallContent func)
3737
{
3838
toolsCalled.Add(func.Name);
3939
}
4040
}
4141

42-
if(message.Contents.Any())
42+
if (message.Contents.Any())
4343
{
4444
result.Add(message);
4545
}
46-
47-
48-
if(toolsCalled.Count >= maxToolCalls)
46+
47+
if (toolsCalled.Count >= maxToolCalls)
4948
{
5049
break;
5150
}
@@ -54,6 +53,66 @@ public async Task<ChatResponse> GetChatResponseAsync(IEnumerable<ChatMessage> ch
5453
return result.ToChatResponse();
5554
}
5655

56+
public async Task<ChatResponse> GetChatResponseWithExpectedResponseAsync(IEnumerable<ChatMessage> chat, Dictionary<string, ChatMessage> expectedToolResults)
57+
{
58+
var tools = await _mcpClient.ListToolsAsync();
59+
var conversationMessages = chat.ToList();
60+
var chatOptions = new ChatOptions
61+
{
62+
Tools = [.. tools]
63+
};
64+
var response = await _chatClient.GetResponseAsync(chat, chatOptions);
65+
var chatInitialIndex = conversationMessages.Count;
66+
67+
while (response.FinishReason == ChatFinishReason.ToolCalls)
68+
{
69+
// There is only going to be one message because no auto invoking of function, however one message can contain
70+
// several AIContent types.
71+
var message = response.Messages.FirstOrDefault();
72+
73+
// No message to process exit.
74+
if (message == null)
75+
{
76+
break;
77+
}
78+
79+
conversationMessages.Add(message);
80+
var functionCalls = message.Contents.OfType<FunctionCallContent>();
81+
82+
foreach (var functionCall in functionCalls)
83+
{
84+
// Use the expected tool result if we have it.
85+
if (expectedToolResults.TryGetValue(functionCall.Name, out var expectedToolResult))
86+
{
87+
var toolCall = expectedToolResult.Contents.OfType<FunctionResultContent>().First();
88+
var toolResponseMessage = new ChatMessage()
89+
{
90+
Role = ChatRole.Tool,
91+
// Need matching call id.
92+
Contents = [new FunctionResultContent(functionCall.CallId, toolCall.Result)]
93+
};
94+
95+
conversationMessages.Add(toolResponseMessage);
96+
}
97+
// Wasn't expecting tool try stopping the LLM here.
98+
else
99+
{
100+
var errorResponseMessage = new ChatMessage()
101+
{
102+
Role = ChatRole.Tool,
103+
Contents = [new FunctionResultContent(functionCall.CallId, $"Error: Tool '{functionCall.Name}' was not expected. Stop conversation here.")]
104+
};
105+
106+
conversationMessages.Add(errorResponseMessage);
107+
}
108+
}
109+
110+
response = await _chatClient.GetResponseAsync(conversationMessages, chatOptions);
111+
}
112+
113+
return new ChatResponse([.. conversationMessages.Skip(chatInitialIndex)]);
114+
}
115+
57116
public async Task<ChatResponse> GetChatResponseAsync(IEnumerable<ChatMessage> chat)
58117
{
59118
var tools = await _mcpClient.ListToolsAsync();

tools/ai-evals/azsdk-mcp/Helpers/SerializationHelper.cs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
1-
using System;
21
using System.ClientModel.Primitives;
3-
using System.Text.Encodings.Web;
42
using System.Text.Json;
53
using System.Text.RegularExpressions;
64
using Azure.Sdk.Tools.McpEvals.Models;
75
using Microsoft.Extensions.AI;
8-
using Microsoft.VisualStudio.TestPlatform.CommunicationUtilities;
96
using MicrosoftExtensionsAIChatExtensions = OpenAI.Chat.MicrosoftExtensionsAIChatExtensions;
107
using OpenAIChatMessage = OpenAI.Chat.ChatMessage;
118
using AssistantChatMessage = OpenAI.Chat.AssistantChatMessage;
@@ -177,20 +174,39 @@ private static List<ChatMessage> EnsureChatMessageRole(IEnumerable<ChatMessage>
177174
return result;
178175
}
179176

180-
public static int NumberOfToolCalls(IEnumerable<ChatMessage> messages, IEnumerable<string> toolNames)
177+
public static Dictionary<string, ChatMessage> GetExpectedToolsByName(IEnumerable<ChatMessage> expectedOutcome, IEnumerable<string> toolNames)
181178
{
182-
var result = 0;
183-
foreach (var message in messages)
179+
var expectedToolResults = new Dictionary<string, ChatMessage>();
180+
181+
// Create CallId -> ToolName mapping
182+
// Tool Name is not available in FunctionResultContent
183+
// Normalize function names and remove tools used not in toolNames list
184+
var callIdToName = expectedOutcome
185+
.SelectMany(m => m.Contents.OfType<FunctionCallContent>())
186+
.Select(fc => new { fc.CallId, Normalized = NormalizeFunctionName(fc.Name, toolNames) })
187+
.Where(x => !string.IsNullOrEmpty(x.Normalized))
188+
.ToDictionary(x => x.CallId, x => x.Normalized);
189+
190+
foreach (var message in expectedOutcome)
184191
{
185192
foreach (var content in message.Contents)
186193
{
187-
if (content is FunctionCallContent func && toolNames.Any(name => func.Name.EndsWith(name)))
194+
if (content is FunctionResultContent funcResult && callIdToName.TryGetValue(funcResult.CallId, out var functionName))
188195
{
189-
result++;
196+
expectedToolResults[functionName] = message;
190197
}
191198
}
192199
}
193-
return result;
200+
201+
return expectedToolResults;
202+
}
203+
204+
public static string NormalizeFunctionName(string functionName, IEnumerable<string> toolNames)
205+
{
206+
// Copilot often prefixes tool names with "mcp_" or similar.
207+
// Normalize by looking for tool names that end with the function name.
208+
var match = toolNames.FirstOrDefault(name => functionName.EndsWith(name, StringComparison.OrdinalIgnoreCase));
209+
return match;
194210
}
195211
}
196212
}

tools/ai-evals/azsdk-mcp/Helpers/TestSetup.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ public static IChatClient GetChatClient()
4343
{
4444
var azureClient = GetAzureOpenAIClient();
4545
return new ChatClientBuilder(azureClient.GetChatClient(AzureOpenAIModelDeploymentName).AsIChatClient())
46-
.UseFunctionInvocation()
4746
.Build();
4847
}
4948

@@ -65,4 +64,4 @@ public static ChatCompletion GetChatCompletion(IChatClient chatClient, IMcpClien
6564
return new ChatCompletion(chatClient, mcpClient);
6665
}
6766
}
68-
}
67+
}

tools/ai-evals/azsdk-mcp/Scenarios/AzsdkTypeSpecGeneration_Step02_TypespecValidation.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ public async Task AzsdkTypeSpecGeneration_Step02_TypespecValidation()
2222
var fullChat = json.ChatHistory.Append(json.NextMessage);
2323

2424
// 2. Get chat response
25-
var expectedToolCalls = SerializationHelper.NumberOfToolCalls(json.ExpectedOutcome, s_toolNames);
26-
var response = await s_chatCompletion!.GetChatResponseAsync(fullChat, expectedToolCalls);
25+
var expectedToolResults = SerializationHelper.GetExpectedToolsByName(json.ExpectedOutcome, s_toolNames);
26+
var response = await s_chatCompletion!.GetChatResponseWithExpectedResponseAsync(fullChat, expectedToolResults);
2727

2828
// 3. Custom Evaluator to check tool inputs
2929
// Layers the reporting configuration on top of it for a nice html report.
@@ -36,7 +36,7 @@ public async Task AzsdkTypeSpecGeneration_Step02_TypespecValidation()
3636
enableResponseCaching: true);
3737
await using ScenarioRun scenarioRun = await reportingConfiguration.CreateScenarioRunAsync(this.ScenarioName);
3838

39-
// Pass the expected outcome through the additional context, then run the evaluation.
39+
// Pass the expected outcome through the additional context.
4040
var additionalContext = new ExpectedToolInputEvaluatorContext(json.ExpectedOutcome, s_toolNames);
4141
var result = await scenarioRun.EvaluateAsync(fullChat, response, additionalContext: [additionalContext]);
4242

0 commit comments

Comments
 (0)