|
| 1 | +#!/usr/bin/env bun |
| 2 | + |
| 3 | +import { z } from "zod"; |
| 4 | +import path from "node:path"; |
| 5 | +import { mkdir, rm, readdir, stat } from "node:fs/promises"; |
| 6 | + |
| 7 | +// Helicone public model registry endpoint |
| 8 | +const DEFAULT_ENDPOINT = |
| 9 | + "https://jawn.helicone.ai/v1/public/model-registry/models"; |
| 10 | + |
| 11 | +// Zod schemas to validate the Helicone response |
| 12 | +const Pricing = z |
| 13 | + .object({ |
| 14 | + prompt: z.number().optional(), |
| 15 | + completion: z.number().optional(), |
| 16 | + cacheRead: z.number().optional(), |
| 17 | + cacheWrite: z.number().optional(), |
| 18 | + reasoning: z.number().optional(), |
| 19 | + }) |
| 20 | + .passthrough(); |
| 21 | + |
| 22 | +const Endpoint = z |
| 23 | + .object({ |
| 24 | + provider: z.string(), |
| 25 | + providerSlug: z.string().optional(), |
| 26 | + supportsPtb: z.boolean().optional(), |
| 27 | + pricing: Pricing.optional(), |
| 28 | + }) |
| 29 | + .passthrough(); |
| 30 | + |
| 31 | +const ModelItem = z |
| 32 | + .object({ |
| 33 | + id: z.string(), |
| 34 | + name: z.string(), |
| 35 | + author: z.string().optional(), |
| 36 | + contextLength: z.number().optional(), |
| 37 | + maxOutput: z.number().optional(), |
| 38 | + trainingDate: z.string().optional(), |
| 39 | + description: z.string().optional(), |
| 40 | + inputModalities: z.array(z.string()).optional(), |
| 41 | + outputModalities: z.array(z.string()).optional(), |
| 42 | + supportedParameters: z.array(z.string()).optional(), |
| 43 | + endpoints: z.array(Endpoint).optional(), |
| 44 | + }) |
| 45 | + .passthrough(); |
| 46 | + |
| 47 | +const HeliconeResponse = z |
| 48 | + .object({ |
| 49 | + data: z.object({ |
| 50 | + models: z.array(ModelItem), |
| 51 | + total: z.number().optional(), |
| 52 | + filters: z.any().optional(), |
| 53 | + }), |
| 54 | + }) |
| 55 | + .passthrough(); |
| 56 | + |
| 57 | +function pickEndpoint(m: z.infer<typeof ModelItem>) { |
| 58 | + if (!m.endpoints || m.endpoints.length === 0) return undefined; |
| 59 | + // Prefer endpoint that matches author if available |
| 60 | + if (m.author) { |
| 61 | + const match = m.endpoints.find((e) => e.provider === m.author); |
| 62 | + if (match) return match; |
| 63 | + } |
| 64 | + return m.endpoints[0]; |
| 65 | +} |
| 66 | + |
| 67 | +function boolFromParams(params: string[] | undefined, keys: string[]): boolean { |
| 68 | + if (!params) return false; |
| 69 | + const set = new Set(params.map((p) => p.toLowerCase())); |
| 70 | + return keys.some((k) => set.has(k.toLowerCase())); |
| 71 | +} |
| 72 | + |
| 73 | +function sanitizeModalities(values: string[] | undefined): string[] { |
| 74 | + if (!values) return ["text"]; // default to text |
| 75 | + const allowed = new Set(["text", "audio", "image", "video", "pdf"]); |
| 76 | + const out = values.map((v) => v.toLowerCase()).filter((v) => allowed.has(v)); |
| 77 | + return out.length > 0 ? out : ["text"]; |
| 78 | +} |
| 79 | + |
| 80 | +function formatToml(model: z.infer<typeof ModelItem>) { |
| 81 | + const ep = pickEndpoint(model); |
| 82 | + const pricing = ep?.pricing; |
| 83 | + |
| 84 | + const supported = model.supportedParameters ?? []; |
| 85 | + |
| 86 | + const nowISO = new Date().toISOString().slice(0, 10); |
| 87 | + const rdRaw = model.trainingDate ? String(model.trainingDate) : nowISO; |
| 88 | + const releaseDate = rdRaw.slice(0, 10); |
| 89 | + const lastUpdated = releaseDate; |
| 90 | + const knowledge = model.trainingDate |
| 91 | + ? String(model.trainingDate).slice(0, 7) |
| 92 | + : undefined; |
| 93 | + |
| 94 | + const attachment = false; // Not exposed by Helicone registry |
| 95 | + const temperature = boolFromParams(supported, ["temperature"]); |
| 96 | + const toolCall = boolFromParams(supported, ["tools", "tool_choice"]); |
| 97 | + const reasoning = boolFromParams(supported, [ |
| 98 | + "reasoning", |
| 99 | + "include_reasoning", |
| 100 | + ]); |
| 101 | + |
| 102 | + const inputMods = sanitizeModalities(model.inputModalities); |
| 103 | + const outputMods = sanitizeModalities(model.outputModalities); |
| 104 | + |
| 105 | + const lines: string[] = []; |
| 106 | + lines.push(`name = "${model.name.replaceAll('"', '\\"')}"`); |
| 107 | + lines.push(`release_date = "${releaseDate}"`); |
| 108 | + lines.push(`last_updated = "${lastUpdated}"`); |
| 109 | + lines.push(`attachment = ${attachment}`); |
| 110 | + lines.push(`reasoning = ${reasoning}`); |
| 111 | + lines.push(`temperature = ${temperature}`); |
| 112 | + lines.push(`tool_call = ${toolCall}`); |
| 113 | + if (knowledge) lines.push(`knowledge = "${knowledge}"`); |
| 114 | + lines.push(`open_weights = false`); |
| 115 | + lines.push(""); |
| 116 | + |
| 117 | + if ( |
| 118 | + pricing && |
| 119 | + (pricing.prompt ?? |
| 120 | + pricing.completion ?? |
| 121 | + pricing.cacheRead ?? |
| 122 | + pricing.cacheWrite ?? |
| 123 | + (reasoning && pricing.reasoning)) !== undefined |
| 124 | + ) { |
| 125 | + lines.push(`[cost]`); |
| 126 | + if (pricing.prompt !== undefined) lines.push(`input = ${pricing.prompt}`); |
| 127 | + if (pricing.completion !== undefined) |
| 128 | + lines.push(`output = ${pricing.completion}`); |
| 129 | + if (reasoning && pricing.reasoning !== undefined) |
| 130 | + lines.push(`reasoning = ${pricing.reasoning}`); |
| 131 | + if (pricing.cacheRead !== undefined) |
| 132 | + lines.push(`cache_read = ${pricing.cacheRead}`); |
| 133 | + if (pricing.cacheWrite !== undefined) |
| 134 | + lines.push(`cache_write = ${pricing.cacheWrite}`); |
| 135 | + lines.push(""); |
| 136 | + } |
| 137 | + |
| 138 | + const context = model.contextLength ?? 0; |
| 139 | + const output = model.maxOutput ?? 4096; |
| 140 | + lines.push(`[limit]`); |
| 141 | + lines.push(`context = ${context}`); |
| 142 | + lines.push(`output = ${output}`); |
| 143 | + lines.push(""); |
| 144 | + |
| 145 | + lines.push(`[modalities]`); |
| 146 | + lines.push(`input = [${inputMods.map((m) => `"${m}"`).join(", ")}]`); |
| 147 | + lines.push(`output = [${outputMods.map((m) => `"${m}"`).join(", ")}]`); |
| 148 | + |
| 149 | + return lines.join("\n") + "\n"; |
| 150 | +} |
| 151 | + |
| 152 | +async function main() { |
| 153 | + const endpoint = DEFAULT_ENDPOINT; |
| 154 | + |
| 155 | + const outDir = path.join( |
| 156 | + import.meta.dirname, |
| 157 | + "..", |
| 158 | + "..", |
| 159 | + "..", |
| 160 | + "providers", |
| 161 | + "helicone", |
| 162 | + "models", |
| 163 | + ); |
| 164 | + |
| 165 | + const res = await fetch(endpoint); |
| 166 | + if (!res.ok) { |
| 167 | + console.error(`Failed to fetch registry: ${res.status} ${res.statusText}`); |
| 168 | + process.exit(1); |
| 169 | + } |
| 170 | + const json = await res.json(); |
| 171 | + |
| 172 | + const parsed = HeliconeResponse.safeParse(json); |
| 173 | + if (!parsed.success) { |
| 174 | + parsed.error.cause = json; |
| 175 | + console.error("Invalid Helicone response:", parsed.error.errors); |
| 176 | + console.error("When parsing:", parsed.error.cause); |
| 177 | + process.exit(1); |
| 178 | + } |
| 179 | + |
| 180 | + const models = parsed.data.data.models; |
| 181 | + |
| 182 | + // Clean output directory: remove subfolders and existing TOML files |
| 183 | + await mkdir(outDir, { recursive: true }); |
| 184 | + for (const entry of await readdir(outDir)) { |
| 185 | + const p = path.join(outDir, entry); |
| 186 | + const st = await stat(p); |
| 187 | + if (st.isDirectory()) { |
| 188 | + await rm(p, { recursive: true, force: true }); |
| 189 | + } else if (st.isFile() && entry.endsWith(".toml")) { |
| 190 | + await rm(p, { force: true }); |
| 191 | + } |
| 192 | + } |
| 193 | + let created = 0; |
| 194 | + |
| 195 | + for (const m of models) { |
| 196 | + const fileSafeId = m.id.replaceAll("/", "-"); |
| 197 | + const filePath = path.join(outDir, `${fileSafeId}.toml`); |
| 198 | + const toml = formatToml(m); |
| 199 | + await Bun.write(filePath, toml); |
| 200 | + created++; |
| 201 | + } |
| 202 | + |
| 203 | + console.log( |
| 204 | + `Generated ${created} model file(s) under providers/helicone/models/*.toml`, |
| 205 | + ); |
| 206 | +} |
| 207 | + |
| 208 | +await main(); |
0 commit comments