Skip to content

Commit c87874c

Browse files
vasilisazaykambrnersapkarimmohraz
authored
[New provider] Sap gen ai hub (#16053)
* add sap gen ai hub * add async tests * add async and streaming support * add embedding model support * add embedding support * remove unused import * fix structured output * clean-up * remove timeout and add tool support * remove unused code * fix(sap): improve streaming robustness; restore embed URL builder compatibility - sap/embed/transformation: add api_key and litellm_params to get_complete_url to align with core flow and prevent failures - sap/chat/handler: wrap async/sync streaming iterators to safely handle Stop(Async)Iteration and errors - sap/chat/transformation: remove unused imports and dead code * fix(sap): linter fix * fix(sap): made gen_ai_hub optional: import check + OptionalDependencyError with install hint if missing. * test(sap): add chat/stream/async tests and OptionalDependencyError check * Fix tool call handling in SAP GenAI Hub transformation Add sap models to model_prices_and_context_window.json and model_prices_and_context_window_backup.json * fix(sap): delete unnecessary code, linter fix * fix(sap): - refactor chat transformation - add support of list and dict content * fix(sap): - fix tests * fix(sap): - fix lint * Update transformation.py * fix(sap): fix model description and fix after rebase * change(sap): - http calls in chat handler, response transformation and auth handling without sap sdk. * change(sap): switching to v2 (chat handler, chat transformation), code clean up * add deployment discovery and improved crendentials handling * add deployment discovery and improved crendentials handling * change(sap): - fix sync stream * change(sap): - fix sync stream * fix(sap): - fix response format * fix(sap): - switch embedding to v2 and http request - reimplement stream creator - improve request transformation * fix async streaming * fix(sap): linters, transformation models, remove sap dependency test * fix(sap): code clean up * add unit test for sap chat completion * linters fix * move token, rg and base_url to properties * (sap): add embedding unit test Signed-off-by: Vasilisa Parshikova <[email protected]> * fix(sap): bypass response format for some models Signed-off-by: Vasilisa Parshikova <[email protected]> * fix(sap): fix chat transformation and list of supported params Signed-off-by: Vasilisa Parshikova <[email protected]> * fix(sap): fix lint * add sap service key module parameter * fix(sap): remove unused code * fix(sap): remove prices * add service key support * fix(sap): - add message content validations - change get_supported_openai_params in chat transformation * typo in mock * fix(sap): - fix in supported params map * fix(sap): - fix in message content validation * fix(sap): - fix in message content validation * fix(sap): - use litellm client for credentials * fix(sap): - linter fix * fix(sap): - use build in custom_http_client - move credentials handling to transformation * fix(sap): - handle stream_options * fix(sap): - fix tests * fix(sap): - code clean up, linter fix * skip other authentication options when creds are provided * fix local variable --------- Signed-off-by: Vasilisa Parshikova <[email protected]> Co-authored-by: Mathis Boerner <[email protected]> Co-authored-by: karimmohraz <[email protected]> Co-authored-by: Karim <[email protected]>
1 parent 958c190 commit c87874c

File tree

16 files changed

+3011
-8
lines changed

16 files changed

+3011
-8
lines changed

litellm/__init__.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@
265265
cometapi_key: Optional[str] = None
266266
ovhcloud_key: Optional[str] = None
267267
lemonade_key: Optional[str] = None
268+
sap_service_key: Optional[str] = None
268269
amazon_nova_api_key: Optional[str] = None
269270
common_cloud_provider_auth_params: dict = {
270271
"params": ["project", "region_name", "token"],
@@ -1069,7 +1070,7 @@ def add_known_models():
10691070
from litellm.litellm_core_utils.token_counter import get_modified_max_tokens
10701071
# client must be imported immediately as it's used as a decorator at function definition time
10711072
from .utils import client
1072-
# Note: Most other utils imports are lazy-loaded via __getattr__ to avoid loading utils.py
1073+
# Note: Most other utils imports are lazy-loaded via __getattr__ to avoid loading utils.py
10731074
# (which imports tiktoken) at import time
10741075

10751076
from .llms.bytez.chat.transformation import BytezChatConfig
@@ -1241,6 +1242,7 @@ def add_known_models():
12411242
from .llms.topaz.image_variations.transformation import TopazImageVariationConfig
12421243
from litellm.llms.openai.completion.transformation import OpenAITextCompletionConfig
12431244
from .llms.groq.chat.transformation import GroqChatConfig
1245+
from .llms.sap.chat.transformation import GenAIHubOrchestrationConfig
12441246
from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
12451247
from .llms.voyage.embedding.transformation_contextual import (
12461248
VoyageContextualEmbeddingConfig,
@@ -1339,6 +1341,7 @@ def add_known_models():
13391341
from .llms.watsonx.completion.transformation import IBMWatsonXAIConfig
13401342
from .llms.watsonx.chat.transformation import IBMWatsonXChatConfig
13411343
from .llms.watsonx.embed.transformation import IBMWatsonXEmbeddingConfig
1344+
from .llms.sap.embed.transformation import GenAIHubEmbeddingConfig
13421345
from .llms.watsonx.audio_transcription.transformation import (
13431346
IBMWatsonXAudioTranscriptionConfig,
13441347
)
@@ -1511,13 +1514,13 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
15111514

15121515
if TYPE_CHECKING:
15131516
from litellm.types.utils import ModelInfo as _ModelInfoType
1514-
1517+
15151518
# Cost calculator functions
15161519
cost_per_token: Callable[..., Tuple[float, float]]
15171520
completion_cost: Callable[..., float]
15181521
response_cost_calculator: Any
15191522
modify_integration: Any
1520-
1523+
15211524
# Utils functions - type stubs for truly lazy loaded functions only
15221525
# (functions NOT imported via "from .main import *")
15231526
get_response_string: Callable[..., str]
@@ -1547,7 +1550,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
15471550
get_first_chars_messages: Callable[..., str]
15481551
get_provider_fields: Callable[..., List]
15491552
get_valid_models: Callable[..., list]
1550-
1553+
15511554
# Response types - truly lazy loaded only (not in main.py or elsewhere)
15521555
ModelResponseListIterator: Type[Any]
15531556

@@ -1563,7 +1566,7 @@ def __getattr__(name: str) -> Any:
15631566
if name in _cost_calculator_names:
15641567
from ._lazy_imports import _lazy_import_cost_calculator
15651568
return _lazy_import_cost_calculator(name)
1566-
1569+
15671570
# Lazy load litellm_logging functions
15681571
_litellm_logging_names = (
15691572
"Logging",
@@ -1572,7 +1575,7 @@ def __getattr__(name: str) -> Any:
15721575
if name in _litellm_logging_names:
15731576
from ._lazy_imports import _lazy_import_litellm_logging
15741577
return _lazy_import_litellm_logging(name)
1575-
1578+
15761579
# Lazy load utils functions
15771580
_utils_names = (
15781581
"exception_type", "get_optional_params", "get_response_string", "token_counter",

litellm/litellm_core_utils/get_llm_provider_logic.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,8 @@ def get_llm_provider( # noqa: PLR0915
406406
custom_llm_provider = "clarifai"
407407
elif model.startswith("amazon_nova"):
408408
custom_llm_provider = "amazon_nova"
409+
elif model.startswith("sap/"):
410+
custom_llm_provider = "sap"
409411
if not custom_llm_provider:
410412
if litellm.suppress_debug_info is False:
411413
print() # noqa

litellm/litellm_core_utils/get_supported_openai_params.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ def get_supported_openai_params( # noqa: PLR0915
116116
f"Unsupported provider config: {transcription_provider_config} for model: {model}"
117117
)
118118
return litellm.OpenAIConfig().get_supported_openai_params(model=model)
119+
elif custom_llm_provider == "sap":
120+
if request_type == "chat_completion":
121+
return litellm.GenAIHubOrchestrationConfig().get_supported_openai_params(model=model)
122+
elif request_type == "embeddings":
123+
return litellm.GenAIHubEmbeddingConfig().get_supported_openai_params(model=model)
119124
elif custom_llm_provider == "azure":
120125
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
121126
return litellm.AzureOpenAIO1Config().get_supported_openai_params(

litellm/litellm_core_utils/streaming_handler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,6 @@ def handle_openai_chat_completion_chunk(self, chunk):
441441
finish_reason = None
442442
logprobs = None
443443
usage = None
444-
445444
if str_line and str_line.choices and len(str_line.choices) > 0:
446445
if (
447446
str_line.choices[0].delta is not None

litellm/llms/sap/chat/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

litellm/llms/sap/chat/handler.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
from __future__ import annotations
2+
3+
import json
4+
import time
5+
import httpx
6+
7+
from typing import Iterator, Optional, AsyncIterator
8+
9+
from litellm.llms.base_llm.chat.transformation import BaseConfig
10+
from litellm.types.llms.openai import OpenAIChatCompletionChunk
11+
from ...custom_httpx.llm_http_handler import BaseLLMHTTPHandler
12+
13+
14+
# -------------------------------
15+
# Errors
16+
# -------------------------------
17+
class GenAIHubOrchestrationError(Exception):
18+
def __init__(self, status_code: int, message: str):
19+
super().__init__(message)
20+
self.status_code = status_code
21+
self.message = message
22+
23+
24+
# -------------------------------
25+
# Stream parsing helpers
26+
# -------------------------------
27+
28+
29+
def _now_ts() -> int:
30+
return int(time.time())
31+
32+
33+
def _is_terminal_chunk(chunk: OpenAIChatCompletionChunk) -> bool:
34+
"""OpenAI-shaped chunk is terminal if any choice has a non-None finish_reason."""
35+
try:
36+
for ch in chunk.choices or []:
37+
if ch.finish_reason is not None:
38+
return True
39+
except Exception:
40+
pass
41+
return False
42+
43+
44+
class _StreamParser:
45+
"""Normalize orchestration streaming events into OpenAI-like chunks."""
46+
47+
@staticmethod
48+
def _from_orchestration_result(evt: dict) -> Optional[OpenAIChatCompletionChunk]:
49+
"""
50+
Accepts orchestration_result shape and maps it to an OpenAI-like *chunk*.
51+
"""
52+
orc = evt.get("orchestration_result") or {}
53+
if not orc:
54+
return None
55+
56+
return OpenAIChatCompletionChunk.model_validate(
57+
{
58+
"id": orc.get("id") or evt.get("request_id") or "stream-chunk",
59+
"object": orc.get("object") or "chat.completion.chunk",
60+
"created": orc.get("created") or evt.get("created") or _now_ts(),
61+
"model": orc.get("model") or "unknown",
62+
"choices": [
63+
{
64+
"index": c.get("index", 0),
65+
"delta": c.get("delta") or {},
66+
"finish_reason": c.get("finish_reason"),
67+
}
68+
for c in (orc.get("choices") or [])
69+
],
70+
}
71+
)
72+
73+
@staticmethod
74+
def to_openai_chunk(event_obj: dict) -> Optional[OpenAIChatCompletionChunk]:
75+
"""
76+
Accepts:
77+
- {"final_result": <openai-style CHUNK>} (IMPORTANT: this is just another chunk, NOT terminal)
78+
- {"orchestration_result": {...}} (map to chunk)
79+
- already-openai-shaped chunks
80+
- other events (ignored)
81+
Raises:
82+
- ValueError for in-stream error objects
83+
"""
84+
# In-stream error per spec (surface as exception)
85+
if "code" in event_obj or "error" in event_obj:
86+
raise ValueError(json.dumps(event_obj))
87+
88+
# FINAL RESULT IS *NOT* TERMINAL: treat it as the next chunk
89+
if "final_result" in event_obj:
90+
fr = event_obj["final_result"] or {}
91+
# ensure it looks like an OpenAI chunk
92+
if "object" not in fr:
93+
fr["object"] = "chat.completion.chunk"
94+
return OpenAIChatCompletionChunk.model_validate(fr)
95+
96+
# Orchestration incremental delta
97+
if "orchestration_result" in event_obj:
98+
return _StreamParser._from_orchestration_result(event_obj)
99+
100+
# Already an OpenAI-like chunk
101+
if "choices" in event_obj and "object" in event_obj:
102+
return OpenAIChatCompletionChunk.model_validate(event_obj)
103+
104+
# Unknown / heartbeat / metrics
105+
return None
106+
107+
108+
# -------------------------------
109+
# Iterators
110+
# -------------------------------
111+
class SAPStreamIterator:
112+
"""
113+
Sync iterator over an httpx streaming response that yields OpenAIChatCompletionChunk.
114+
Accepts both SSE `data: ...` and raw JSON lines. Closes on terminal chunk or [DONE].
115+
"""
116+
117+
def __init__(
118+
self,
119+
response: Iterator,
120+
event_prefix: str = "data: ",
121+
final_msg: str = "[DONE]",
122+
):
123+
self._resp = response
124+
self._iter = response
125+
self._prefix = event_prefix
126+
self._final = final_msg
127+
self._done = False
128+
129+
def __iter__(self) -> Iterator[OpenAIChatCompletionChunk]:
130+
return self
131+
132+
def __next__(self) -> OpenAIChatCompletionChunk:
133+
if self._done:
134+
raise StopIteration
135+
136+
for raw in self._iter:
137+
line = (raw or "").strip()
138+
if not line:
139+
continue
140+
141+
payload = (
142+
line[len(self._prefix) :] if line.startswith(self._prefix) else line
143+
)
144+
if payload == self._final:
145+
self._safe_close()
146+
raise StopIteration
147+
148+
try:
149+
obj = json.loads(payload)
150+
except Exception:
151+
continue
152+
153+
try:
154+
chunk = _StreamParser.to_openai_chunk(obj)
155+
except ValueError as e:
156+
self._safe_close()
157+
raise e
158+
159+
if chunk is None:
160+
continue
161+
162+
# Close on terminal
163+
if _is_terminal_chunk(chunk):
164+
self._safe_close()
165+
166+
return chunk
167+
168+
self._safe_close()
169+
raise StopIteration
170+
171+
def _safe_close(self) -> None:
172+
if self._done:
173+
return
174+
else:
175+
self._done = True
176+
177+
178+
class AsyncSAPStreamIterator:
179+
sync_stream = False
180+
181+
def __init__(
182+
self,
183+
response:AsyncIterator,
184+
event_prefix: str = "data: ",
185+
final_msg: str = "[DONE]",
186+
):
187+
self._resp = response
188+
self._prefix = event_prefix
189+
self._final = final_msg
190+
self._line_iter = None
191+
self._done = False
192+
193+
def __aiter__(self):
194+
return self
195+
196+
async def __anext__(self):
197+
if self._done:
198+
raise StopAsyncIteration
199+
200+
if self._line_iter is None:
201+
self._line_iter = self._resp
202+
203+
while True:
204+
try:
205+
raw = await self._line_iter.__anext__()
206+
except (StopAsyncIteration, httpx.ReadError, OSError):
207+
await self._aclose()
208+
raise StopAsyncIteration
209+
210+
line = (raw or "").strip()
211+
if not line:
212+
continue
213+
214+
# now = lambda: int(time.time() * 1000)
215+
payload = (
216+
line[len(self._prefix) :] if line.startswith(self._prefix) else line
217+
)
218+
if payload == self._final:
219+
await self._aclose()
220+
raise StopAsyncIteration
221+
try:
222+
obj = json.loads(payload)
223+
except Exception:
224+
continue
225+
226+
try:
227+
chunk = _StreamParser.to_openai_chunk(obj)
228+
except ValueError as e:
229+
await self._aclose()
230+
raise GenAIHubOrchestrationError(502, str(e))
231+
232+
if chunk is None:
233+
continue
234+
235+
# If terminal, close BEFORE returning. Next __anext__() will stop immediately.
236+
if any(c.finish_reason is not None for c in (chunk.choices or [])):
237+
await self._aclose()
238+
239+
return chunk
240+
241+
async def _aclose(self):
242+
if self._done:
243+
return
244+
else:
245+
self._done = True
246+
247+
248+
# -------------------------------
249+
# LLM handler
250+
# -------------------------------
251+
class GenAIHubOrchestration(BaseLLMHTTPHandler):
252+
def _add_stream_param_to_request_body(
253+
self,
254+
data: dict,
255+
provider_config: BaseConfig,
256+
fake_stream: bool
257+
):
258+
if data.get("config", {}).get("stream", None) is not None:
259+
data["config"]["stream"]["enabled"] = True
260+
else:
261+
data["config"]["stream"] = {"enabled": True}
262+
return data

0 commit comments

Comments
 (0)