fix: increase max_tokens from 5 to 16 for some specific models like gpt-5-codex in ping test, add PING_MAX_TOKENS configuration for model validation ping requests (#235)

Bytechoreographer · web-flow · commit ee8ea3de16b9 · 2025-12-09T12:57:43.000+08:00
diff --git a/python/dify_plugin/config/config.py b/python/dify_plugin/config/config.py
@@ -40,6 +40,11 @@ class DifyPluginEnv(BaseSettings):
 
     DIFY_PLUGIN_DAEMON_URL: str = Field(default="http://localhost:5002", description="backwards invocation address")
 
+    PING_MAX_TOKENS: int = Field(
+        default=16,
+        description="Maximum tokens for model validation ping request. Some providers require at least 16 tokens.",
+    )
+
     model_config = SettingsConfigDict(
         # read from dotenv format config file
         env_file=".env",
diff --git a/python/dify_plugin/interfaces/model/openai_compatible/llm.py b/python/dify_plugin/interfaces/model/openai_compatible/llm.py
@@ -172,6 +172,10 @@ def validate_credentials(self, model: str, credentials: dict) -> None:
         :return:
         """
         try:
+            # Load ping max_tokens configuration from environment variable
+            config = DifyPluginEnv()
+            ping_max_tokens = config.PING_MAX_TOKENS
+
             headers = {"Content-Type": "application/json"}
 
             api_key = credentials.get("api_key")
@@ -183,7 +187,7 @@ def validate_credentials(self, model: str, credentials: dict) -> None:
                 endpoint_url += "/"
 
             # prepare the payload for a simple ping to the model
-            data = {"model": credentials.get("endpoint_model_name", model), "max_tokens": 5}
+            data = {"model": credentials.get("endpoint_model_name", model), "max_tokens": ping_max_tokens}
 
             completion_type = LLMMode.value_of(credentials["mode"])
 
@@ -202,7 +206,7 @@ def validate_credentials(self, model: str, credentials: dict) -> None:
             stream_mode_auth = credentials.get("stream_mode_auth", "not_use")
             if stream_mode_auth == "use":
                 data["stream"] = True
-                data["max_tokens"] = 10
+                data["max_tokens"] = ping_max_tokens
                 response = requests.post(endpoint_url, headers=headers, json=data, timeout=(10, 300), stream=True)
                 if response.status_code != 200:
                     raise CredentialsValidateFailedError(