BerriAI
diff --git a/‎litellm/__init__.py‎
Lines changed: 9 additions & 6 deletions b/‎litellm/__init__.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎litellm/litellm_core_utils/get_llm_provider_logic.py‎
Lines changed: 2 additions & 0 deletions b/‎litellm/litellm_core_utils/get_llm_provider_logic.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/get_supported_openai_params.py‎
Lines changed: 5 additions & 0 deletions b/‎litellm/litellm_core_utils/get_supported_openai_params.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/streaming_handler.py‎
Lines changed: 0 additions & 1 deletion b/‎litellm/litellm_core_utils/streaming_handler.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎litellm/llms/sap/chat/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎litellm/llms/sap/chat/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎litellm/llms/sap/chat/handler.py‎
Lines changed: 262 additions & 0 deletions b/‎litellm/llms/sap/chat/handler.py‎
Lines changed: 262 additions & 0 deletions
@@ -265,6 +265,7 @@
 cometapi_key: Optional[str] = None
 ovhcloud_key: Optional[str] = None
 lemonade_key: Optional[str] = None
+sap_service_key: Optional[str] = None
 amazon_nova_api_key: Optional[str] = None
 common_cloud_provider_auth_params: dict = {
     "params": ["project", "region_name", "token"],
@@ -1069,7 +1070,7 @@ def add_known_models():
 from litellm.litellm_core_utils.token_counter import get_modified_max_tokens
 # client must be imported immediately as it's used as a decorator at function definition time
 from .utils import client
-# Note: Most other utils imports are lazy-loaded via __getattr__ to avoid loading utils.py 
+# Note: Most other utils imports are lazy-loaded via __getattr__ to avoid loading utils.py
 # (which imports tiktoken) at import time
 
 from .llms.bytez.chat.transformation import BytezChatConfig
@@ -1241,6 +1242,7 @@ def add_known_models():
 from .llms.topaz.image_variations.transformation import TopazImageVariationConfig
 from litellm.llms.openai.completion.transformation import OpenAITextCompletionConfig
 from .llms.groq.chat.transformation import GroqChatConfig
+from .llms.sap.chat.transformation import GenAIHubOrchestrationConfig
 from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
 from .llms.voyage.embedding.transformation_contextual import (
     VoyageContextualEmbeddingConfig,
@@ -1339,6 +1341,7 @@ def add_known_models():
 from .llms.watsonx.completion.transformation import IBMWatsonXAIConfig
 from .llms.watsonx.chat.transformation import IBMWatsonXChatConfig
 from .llms.watsonx.embed.transformation import IBMWatsonXEmbeddingConfig
+from .llms.sap.embed.transformation import GenAIHubEmbeddingConfig
 from .llms.watsonx.audio_transcription.transformation import (
     IBMWatsonXAudioTranscriptionConfig,
 )
@@ -1511,13 +1514,13 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
 
 if TYPE_CHECKING:
     from litellm.types.utils import ModelInfo as _ModelInfoType
-    
+
     # Cost calculator functions
     cost_per_token: Callable[..., Tuple[float, float]]
     completion_cost: Callable[..., float]
     response_cost_calculator: Any
     modify_integration: Any
-    
+
     # Utils functions - type stubs for truly lazy loaded functions only
     # (functions NOT imported via "from .main import *")
     get_response_string: Callable[..., str]
@@ -1547,7 +1550,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
     get_first_chars_messages: Callable[..., str]
     get_provider_fields: Callable[..., List]
     get_valid_models: Callable[..., list]
-    
+
     # Response types - truly lazy loaded only (not in main.py or elsewhere)
     ModelResponseListIterator: Type[Any]
 
@@ -1563,7 +1566,7 @@ def __getattr__(name: str) -> Any:
     if name in _cost_calculator_names:
         from ._lazy_imports import _lazy_import_cost_calculator
         return _lazy_import_cost_calculator(name)
-    
+
     # Lazy load litellm_logging functions
     _litellm_logging_names = (
         "Logging",
@@ -1572,7 +1575,7 @@ def __getattr__(name: str) -> Any:
     if name in _litellm_logging_names:
         from ._lazy_imports import _lazy_import_litellm_logging
         return _lazy_import_litellm_logging(name)
-    
+
     # Lazy load utils functions
     _utils_names = (
         "exception_type", "get_optional_params", "get_response_string", "token_counter",
 
@@ -406,6 +406,8 @@ def get_llm_provider(  # noqa: PLR0915
             custom_llm_provider = "clarifai"
         elif model.startswith("amazon_nova"):
             custom_llm_provider = "amazon_nova"
+        elif model.startswith("sap/"):
+            custom_llm_provider = "sap"
         if not custom_llm_provider:
             if litellm.suppress_debug_info is False:
                 print()  # noqa
 
@@ -116,6 +116,11 @@ def get_supported_openai_params(  # noqa: PLR0915
                     f"Unsupported provider config: {transcription_provider_config} for model: {model}"
                 )
         return litellm.OpenAIConfig().get_supported_openai_params(model=model)
+    elif custom_llm_provider == "sap":
+        if request_type == "chat_completion":
+            return litellm.GenAIHubOrchestrationConfig().get_supported_openai_params(model=model)
+        elif request_type == "embeddings":
+            return litellm.GenAIHubEmbeddingConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "azure":
         if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
             return litellm.AzureOpenAIO1Config().get_supported_openai_params(
 
@@ -441,7 +441,6 @@ def handle_openai_chat_completion_chunk(self, chunk):
             finish_reason = None
             logprobs = None
             usage = None
-
             if str_line and str_line.choices and len(str_line.choices) > 0:
                 if (
                     str_line.choices[0].delta is not None
 
@@ -0,0 +1 @@
+
@@ -0,0 +1,262 @@
+from __future__ import annotations
+
+import json
+import time
+import httpx
+
+from typing import Iterator, Optional, AsyncIterator
+
+from litellm.llms.base_llm.chat.transformation import BaseConfig
+from litellm.types.llms.openai import OpenAIChatCompletionChunk
+from ...custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+
+
+# -------------------------------
+# Errors
+# -------------------------------
+class GenAIHubOrchestrationError(Exception):
+    def __init__(self, status_code: int, message: str):
+        super().__init__(message)
+        self.status_code = status_code
+        self.message = message
+
+
+# -------------------------------
+# Stream parsing helpers
+# -------------------------------
+
+
+def _now_ts() -> int:
+    return int(time.time())
+
+
+def _is_terminal_chunk(chunk: OpenAIChatCompletionChunk) -> bool:
+    """OpenAI-shaped chunk is terminal if any choice has a non-None finish_reason."""
+    try:
+        for ch in chunk.choices or []:
+            if ch.finish_reason is not None:
+                return True
+    except Exception:
+        pass
+    return False
+
+
+class _StreamParser:
+    """Normalize orchestration streaming events into OpenAI-like chunks."""
+
+    @staticmethod
+    def _from_orchestration_result(evt: dict) -> Optional[OpenAIChatCompletionChunk]:
+        """
+        Accepts orchestration_result shape and maps it to an OpenAI-like *chunk*.
+        """
+        orc = evt.get("orchestration_result") or {}
+        if not orc:
+            return None
+
+        return OpenAIChatCompletionChunk.model_validate(
+            {
+                "id": orc.get("id") or evt.get("request_id") or "stream-chunk",
+                "object": orc.get("object") or "chat.completion.chunk",
+                "created": orc.get("created") or evt.get("created") or _now_ts(),
+                "model": orc.get("model") or "unknown",
+                "choices": [
+                    {
+                        "index": c.get("index", 0),
+                        "delta": c.get("delta") or {},
+                        "finish_reason": c.get("finish_reason"),
+                    }
+                    for c in (orc.get("choices") or [])
+                ],
+            }
+        )
+
+    @staticmethod
+    def to_openai_chunk(event_obj: dict) -> Optional[OpenAIChatCompletionChunk]:
+        """
+        Accepts:
+          - {"final_result": <openai-style CHUNK>}   (IMPORTANT: this is just another chunk, NOT terminal)
+          - {"orchestration_result": {...}}          (map to chunk)
+          - already-openai-shaped chunks
+          - other events (ignored)
+        Raises:
+          - ValueError for in-stream error objects
+        """
+        # In-stream error per spec (surface as exception)
+        if "code" in event_obj or "error" in event_obj:
+            raise ValueError(json.dumps(event_obj))
+
+        # FINAL RESULT IS *NOT* TERMINAL: treat it as the next chunk
+        if "final_result" in event_obj:
+            fr = event_obj["final_result"] or {}
+            # ensure it looks like an OpenAI chunk
+            if "object" not in fr:
+                fr["object"] = "chat.completion.chunk"
+            return OpenAIChatCompletionChunk.model_validate(fr)
+
+        # Orchestration incremental delta
+        if "orchestration_result" in event_obj:
+            return _StreamParser._from_orchestration_result(event_obj)
+
+        # Already an OpenAI-like chunk
+        if "choices" in event_obj and "object" in event_obj:
+            return OpenAIChatCompletionChunk.model_validate(event_obj)
+
+        # Unknown / heartbeat / metrics
+        return None
+
+
+# -------------------------------
+# Iterators
+# -------------------------------
+class SAPStreamIterator:
+    """
+    Sync iterator over an httpx streaming response that yields OpenAIChatCompletionChunk.
+    Accepts both SSE `data: ...` and raw JSON lines. Closes on terminal chunk or [DONE].
+    """
+
+    def __init__(
+        self,
+        response: Iterator,
+        event_prefix: str = "data: ",
+        final_msg: str = "[DONE]",
+    ):
+        self._resp = response
+        self._iter = response
+        self._prefix = event_prefix
+        self._final = final_msg
+        self._done = False
+
+    def __iter__(self) -> Iterator[OpenAIChatCompletionChunk]:
+        return self
+
+    def __next__(self) -> OpenAIChatCompletionChunk:
+        if self._done:
+            raise StopIteration
+
+        for raw in self._iter:
+            line = (raw or "").strip()
+            if not line:
+                continue
+
+            payload = (
+                line[len(self._prefix) :] if line.startswith(self._prefix) else line
+            )
+            if payload == self._final:
+                self._safe_close()
+                raise StopIteration
+
+            try:
+                obj = json.loads(payload)
+            except Exception:
+                continue
+
+            try:
+                chunk = _StreamParser.to_openai_chunk(obj)
+            except ValueError as e:
+                self._safe_close()
+                raise e
+
+            if chunk is None:
+                continue
+
+            # Close on terminal
+            if _is_terminal_chunk(chunk):
+                self._safe_close()
+
+            return chunk
+
+        self._safe_close()
+        raise StopIteration
+
+    def _safe_close(self) -> None:
+        if self._done:
+            return
+        else:
+            self._done = True
+
+
+class AsyncSAPStreamIterator:
+    sync_stream = False
+
+    def __init__(
+        self,
+        response:AsyncIterator,
+        event_prefix: str = "data: ",
+        final_msg: str = "[DONE]",
+    ):
+        self._resp = response
+        self._prefix = event_prefix
+        self._final = final_msg
+        self._line_iter = None
+        self._done = False
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        if self._done:
+            raise StopAsyncIteration
+
+        if self._line_iter is None:
+            self._line_iter = self._resp
+
+        while True:
+            try:
+                raw = await self._line_iter.__anext__()
+            except (StopAsyncIteration, httpx.ReadError, OSError):
+                await self._aclose()
+                raise StopAsyncIteration
+
+            line = (raw or "").strip()
+            if not line:
+                continue
+
+            # now = lambda: int(time.time() * 1000)
+            payload = (
+                line[len(self._prefix) :] if line.startswith(self._prefix) else line
+            )
+            if payload == self._final:
+                await self._aclose()
+                raise StopAsyncIteration
+            try:
+                obj = json.loads(payload)
+            except Exception:
+                continue
+
+            try:
+                chunk = _StreamParser.to_openai_chunk(obj)
+            except ValueError as e:
+                await self._aclose()
+                raise GenAIHubOrchestrationError(502, str(e))
+
+            if chunk is None:
+                continue
+
+            # If terminal, close BEFORE returning. Next __anext__() will stop immediately.
+            if any(c.finish_reason is not None for c in (chunk.choices or [])):
+                await self._aclose()
+
+            return chunk
+
+    async def _aclose(self):
+        if self._done:
+            return
+        else:
+            self._done = True
+
+
+# -------------------------------
+# LLM handler
+# -------------------------------
+class GenAIHubOrchestration(BaseLLMHTTPHandler):
+    def _add_stream_param_to_request_body(
+            self,
+            data: dict,
+            provider_config: BaseConfig,
+            fake_stream: bool
+            ):
+        if data.get("config", {}).get("stream", None) is not None:
+            data["config"]["stream"]["enabled"] = True
+        else:
+            data["config"]["stream"] = {"enabled": True}
+        return data