Skip to content

Commit 50f4324

Browse files
committed
Add /api/generate endpoint for model loading and unloading
So we can load and unload models Signed-off-by: Eric Curtin <[email protected]>
1 parent 454ece7 commit 50f4324

File tree

13 files changed

+299
-10
lines changed

13 files changed

+299
-10
lines changed

cmd/cli/commands/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ func NewRootCmd(cli *command.DockerCli) *cobra.Command {
113113
newConfigureCmd(),
114114
newPSCmd(),
115115
newDFCmd(),
116+
newStopCmd(),
116117
newUnloadCmd(),
117118
newRequestsCmd(),
118119
)

cmd/cli/commands/run.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,8 +691,12 @@ func newRunCmd() *cobra.Command {
691691
}
692692
}
693693

694+
// Check if a prompt was explicitly provided (even if empty string)
695+
// If args length > 1, then a prompt argument was provided (even if it's "")
696+
explicitPromptProvided := len(args) > 1
697+
694698
// Handle --detach flag: just load the model without interaction
695-
if detach {
699+
if detach || (explicitPromptProvided && prompt == "") {
696700
// Make a minimal request to load the model into memory
697701
err := desktopClient.Chat(model, "", nil, func(content string) {
698702
// Silently discard output in detach mode
@@ -714,6 +718,14 @@ func newRunCmd() *cobra.Command {
714718
return nil
715719
}
716720

721+
// Preload the model in the background to optimize for the first user interaction
722+
// This makes sure the model is loaded when the user types their first prompt
723+
go func() {
724+
_ = desktopClient.Chat(model, "", nil, func(content string) {
725+
// Silently preload the model - discard output
726+
}, false)
727+
}()
728+
717729
// Use enhanced readline-based interactive mode when terminal is available
718730
if term.IsTerminal(int(os.Stdin.Fd())) {
719731
return generateInteractiveWithReadline(cmd, desktopClient, model)

cmd/cli/commands/stop.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/docker/model-runner/cmd/cli/commands/completion"
7+
"github.com/docker/model-runner/cmd/cli/desktop"
8+
"github.com/docker/model-runner/pkg/inference/models"
9+
"github.com/spf13/cobra"
10+
)
11+
12+
func newStopCmd() *cobra.Command {
13+
var backend string
14+
15+
const cmdArgs = "MODEL"
16+
c := &cobra.Command{
17+
Use: "stop " + cmdArgs,
18+
Short: "Stop a running model",
19+
RunE: func(cmd *cobra.Command, args []string) error {
20+
model := models.NormalizeModelName(args[0])
21+
unloadResp, err := desktopClient.Unload(desktop.UnloadRequest{Backend: backend, Models: []string{model}})
22+
if err != nil {
23+
err = handleClientError(err, "Failed to stop model")
24+
return handleNotRunningError(err)
25+
}
26+
unloaded := unloadResp.UnloadedRunners
27+
if unloaded == 0 {
28+
cmd.Println("No such model running.")
29+
} else {
30+
cmd.Printf("Stopped %d model(s).\n", unloaded)
31+
}
32+
return nil
33+
},
34+
ValidArgsFunction: completion.NoComplete,
35+
}
36+
c.Args = func(cmd *cobra.Command, args []string) error {
37+
if len(args) < 1 {
38+
return fmt.Errorf(
39+
"'docker model stop' requires MODEL.\\n\\n" +
40+
"Usage: docker model stop " + cmdArgs + "\\n\\n" +
41+
"See 'docker model stop --help' for more information.",
42+
)
43+
}
44+
return nil
45+
}
46+
c.Flags().StringVar(&backend, "backend", "", "Optional backend to target")
47+
return c
48+
}

cmd/cli/commands/utils.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ func handleClientError(err error, message string) error {
3939
return errors.Join(err, errors.New(message))
4040
}
4141

42+
// handleNotRunningError checks if the error indicates that the model was not running
43+
// and returns a user-friendly message in that case
44+
func handleNotRunningError(err error) error {
45+
// For now, just return the error as-is
46+
// This function can be expanded to handle specific "model not running" errors in the future
47+
return err
48+
}
49+
4250
// stripDefaultsFromModelName removes the default "ai/" prefix and ":latest" tag for display.
4351
// Examples:
4452
// - "ai/gemma3:latest" -> "gemma3"

cmd/cli/docs/reference/docker_model.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cname:
2222
- docker model run
2323
- docker model start-runner
2424
- docker model status
25+
- docker model stop
2526
- docker model stop-runner
2627
- docker model tag
2728
- docker model uninstall-runner
@@ -44,6 +45,7 @@ clink:
4445
- docker_model_run.yaml
4546
- docker_model_start-runner.yaml
4647
- docker_model_status.yaml
48+
- docker_model_stop.yaml
4749
- docker_model_stop-runner.yaml
4850
- docker_model_tag.yaml
4951
- docker_model_uninstall-runner.yaml
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
command: docker model stop
2+
short: Stop a running model
3+
long: Stop a running model
4+
usage: docker model stop MODEL
5+
pname: docker model
6+
plink: docker_model.yaml
7+
options:
8+
- option: backend
9+
value_type: string
10+
description: Optional backend to target
11+
deprecated: false
12+
hidden: false
13+
experimental: false
14+
experimentalcli: false
15+
kubernetes: false
16+
swarm: false
17+
deprecated: false
18+
hidden: false
19+
experimental: false
20+
experimentalcli: false
21+
kubernetes: false
22+
swarm: false
23+

cmd/cli/docs/reference/model.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Docker Model Runner
2323
| [`run`](model_run.md) | Run a model and interact with it using a submitted prompt or chat mode |
2424
| [`start-runner`](model_start-runner.md) | Start Docker Model Runner (Docker Engine only) |
2525
| [`status`](model_status.md) | Check if the Docker Model Runner is running |
26+
| [`stop`](model_stop.md) | Stop a running model |
2627
| [`stop-runner`](model_stop-runner.md) | Stop Docker Model Runner (Docker Engine only) |
2728
| [`tag`](model_tag.md) | Tag a model |
2829
| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner (Docker Engine only) |
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# docker model stop
2+
3+
<!---MARKER_GEN_START-->
4+
Stop a running model
5+
6+
### Options
7+
8+
| Name | Type | Default | Description |
9+
|:------------|:---------|:--------|:---------------------------|
10+
| `--backend` | `string` | | Optional backend to target |
11+
12+
13+
<!---MARKER_GEN_END-->
14+

main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,17 @@ func main() {
179179
// Add /v1 as an alias for /engines/v1
180180
router.Handle("/v1/", &V1AliasHandler{scheduler: scheduler})
181181

182+
// Add API endpoints by creating a custom handler
183+
apiHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
184+
switch r.URL.Path {
185+
case "/api/generate":
186+
scheduler.HandleGenerate(w, r)
187+
default:
188+
http.NotFound(w, r)
189+
}
190+
})
191+
router.Handle("/api/generate", apiHandler)
192+
182193
// Add metrics endpoint if enabled
183194
if os.Getenv("DISABLE_METRICS") != "1" {
184195
metricsHandler := metrics.NewAggregatedMetricsHandler(

pkg/inference/scheduling/api.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,33 @@ type ConfigureRequest struct {
9494
RawRuntimeFlags string `json:"raw-runtime-flags,omitempty"`
9595
Speculative *inference.SpeculativeDecodingConfig `json:"speculative,omitempty"`
9696
}
97+
98+
// GenerateRequest represents the request structure for /api/generate endpoint
99+
type GenerateRequest struct {
100+
Model string `json:"model"`
101+
Prompt string `json:"prompt"`
102+
System string `json:"system,omitempty"`
103+
Template string `json:"template,omitempty"`
104+
Context []int `json:"context,omitempty"`
105+
Stream *bool `json:"stream,omitempty"`
106+
Raw bool `json:"raw,omitempty"`
107+
KeepAlive *int `json:"keep_alive,omitempty"`
108+
Options map[string]interface{} `json:"options,omitempty"`
109+
}
110+
111+
// GenerateResponse represents the response structure for /api/generate endpoint
112+
type GenerateResponse struct {
113+
Model string `json:"model"`
114+
CreatedAt time.Time `json:"created_at"`
115+
Response string `json:"response"`
116+
Done bool `json:"done"`
117+
DoneReason string `json:"done_reason,omitempty"`
118+
Context []int `json:"context,omitempty"`
119+
TotalDuration int64 `json:"total_duration,omitempty"`
120+
LoadDuration int64 `json:"load_duration,omitempty"`
121+
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
122+
PromptEvalDuration int64 `json:"prompt_eval_duration,omitempty"`
123+
EvalCount int `json:"eval_count,omitempty"`
124+
EvalDuration int64 `json:"eval_duration,omitempty"`
125+
}
126+

0 commit comments

Comments
 (0)