import { type Api, completeSimple, type Model } from "@mariozechner/pi-ai"; import { Type } from "@sinclair/typebox"; import { describe, expect, it } from "vitest"; import { loadConfig } from "../config/config.js"; import { resolveOpenClawAgentDir } from "./agent-paths.js"; import { collectAnthropicApiKeys, isAnthropicBillingError, isAnthropicRateLimitError, } from "./live-auth-keys.js"; import { isHighSignalLiveModelRef } from "./live-model-filter.js"; import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js"; import { getApiKeyForModel, requireApiKey } from "./model-auth.js"; import { shouldSuppressBuiltInModel } from "./model-suppression.js"; import { ensureOpenClawModelsJson } from "./models-config.js"; import { isRateLimitErrorMessage } from "./pi-embedded-helpers/errors.js"; import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js"; const LIVE = isLiveTestEnabled(); const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim()); const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled(); const LIVE_HEARTBEAT_MS = Math.max(1_000, toInt(process.env.OPENCLAW_LIVE_HEARTBEAT_MS, 30_000)); const LIVE_SETUP_TIMEOUT_MS = Math.max( 1_000, toInt(process.env.OPENCLAW_LIVE_SETUP_TIMEOUT_MS, 45_000), ); const describeLive = LIVE ? describe : describe.skip; function parseCsvFilter(raw?: string): Set | null { const trimmed = raw?.trim(); if (!trimmed || trimmed === "all") { return null; } const ids = trimmed .split(",") .map((s) => s.trim()) .filter(Boolean); return ids.length ? new Set(ids) : null; } function parseProviderFilter(raw?: string): Set | null { return parseCsvFilter(raw); } function parseModelFilter(raw?: string): Set | null { return parseCsvFilter(raw); } function logProgress(message: string): void { process.stderr.write(`[live] ${message}\n`); } function formatElapsedSeconds(ms: number): string { return `${Math.max(1, Math.round(ms / 1_000))}s`; } async function withLiveHeartbeat(operation: Promise, context: string): Promise { const startedAt = Date.now(); let heartbeatCount = 0; const timer = setInterval(() => { heartbeatCount += 1; logProgress(`${context}: still running (${formatElapsedSeconds(Date.now() - startedAt)})`); }, LIVE_HEARTBEAT_MS); timer.unref?.(); try { return await operation; } finally { clearInterval(timer); if (heartbeatCount > 0) { logProgress(`${context}: completed after ${formatElapsedSeconds(Date.now() - startedAt)}`); } } } async function withLiveStageTimeout( operation: Promise, context: string, timeoutMs = LIVE_SETUP_TIMEOUT_MS, ): Promise { let hardTimer: ReturnType | undefined; try { return await withLiveHeartbeat( Promise.race([ operation, new Promise((_, reject) => { hardTimer = setTimeout(() => { reject(new Error(`${context} timed out after ${timeoutMs}ms`)); }, timeoutMs); hardTimer.unref?.(); }), ]), context, ); } finally { if (hardTimer) { clearTimeout(hardTimer); } } } function formatFailurePreview( failures: Array<{ model: string; error: string }>, maxItems: number, ): string { const limit = Math.max(1, maxItems); const lines = failures.slice(0, limit).map((failure, index) => { const normalized = failure.error.replace(/\s+/g, " ").trim(); const clipped = normalized.length > 320 ? `${normalized.slice(0, 317)}...` : normalized; return `${index + 1}. ${failure.model}: ${clipped}`; }); const remaining = failures.length - limit; if (remaining > 0) { lines.push(`... and ${remaining} more`); } return lines.join("\n"); } function isGoogleModelNotFoundError(err: unknown): boolean { const msg = String(err); if (!/not found/i.test(msg)) { return false; } if (/\b404\b/.test(msg)) { return true; } if (/models\/.+ is not found for api version/i.test(msg)) { return true; } if (/"status"\\s*:\\s*"NOT_FOUND"/.test(msg)) { return true; } if (/"code"\\s*:\\s*404/.test(msg)) { return true; } return false; } function isModelNotFoundErrorMessage(raw: string): boolean { const msg = raw.trim(); if (!msg) { return false; } if (/\b404\b/.test(msg) && /not(?:[\s_-]+)?found/i.test(msg)) { return true; } if (/not_found_error/i.test(msg)) { return true; } if (/model:\s*[a-z0-9._-]+/i.test(msg) && /not(?:[\s_-]+)?found/i.test(msg)) { return true; } return false; } describe("isModelNotFoundErrorMessage", () => { it("matches whitespace-separated not found errors", () => { expect(isModelNotFoundErrorMessage("404 model not found")).toBe(true); expect(isModelNotFoundErrorMessage("model: minimax-text-01 not found")).toBe(true); }); it("still matches underscore and hyphen variants", () => { expect(isModelNotFoundErrorMessage("404 model not_found")).toBe(true); expect(isModelNotFoundErrorMessage("404 model not-found")).toBe(true); }); }); function isChatGPTUsageLimitErrorMessage(raw: string): boolean { const msg = raw.toLowerCase(); return msg.includes("hit your chatgpt usage limit") && msg.includes("try again in"); } function isRefreshTokenReused(raw: string): boolean { return /refresh_token_reused/i.test(raw); } function isInstructionsRequiredError(raw: string): boolean { return /instructions are required/i.test(raw); } function isModelTimeoutError(raw: string): boolean { return /model call timed out after \d+ms/i.test(raw); } function isProviderUnavailableErrorMessage(raw: string): boolean { const msg = raw.toLowerCase(); return ( msg.includes("no allowed providers are available") || msg.includes("provider unavailable") || msg.includes("upstream provider unavailable") || msg.includes("upstream error from google") ); } function toInt(value: string | undefined, fallback: number): number { const trimmed = value?.trim(); if (!trimmed) { return fallback; } const parsed = Number.parseInt(trimmed, 10); return Number.isFinite(parsed) ? parsed : fallback; } function capByProviderSpread( items: T[], maxItems: number, providerOf: (item: T) => string, ): T[] { if (maxItems <= 0 || items.length <= maxItems) { return items; } const providerOrder: string[] = []; const grouped = new Map(); for (const item of items) { const provider = providerOf(item); const bucket = grouped.get(provider); if (bucket) { bucket.push(item); continue; } providerOrder.push(provider); grouped.set(provider, [item]); } const selected: T[] = []; while (selected.length < maxItems && grouped.size > 0) { for (const provider of providerOrder) { const bucket = grouped.get(provider); if (!bucket || bucket.length === 0) { continue; } const item = bucket.shift(); if (item) { selected.push(item); } if (bucket.length === 0) { grouped.delete(provider); } if (selected.length >= maxItems) { break; } } } return selected; } function resolveTestReasoning( model: Model, ): "minimal" | "low" | "medium" | "high" | "xhigh" | undefined { if (!model.reasoning) { return undefined; } const id = model.id.toLowerCase(); if (model.provider === "openai" || model.provider === "openai-codex") { if (id.includes("pro")) { return "high"; } return "medium"; } return "low"; } function resolveLiveSystemPrompt(model: Model): string | undefined { if (model.provider === "openai-codex") { return "You are a concise assistant. Follow the user's instruction exactly."; } return undefined; } describe("resolveLiveSystemPrompt", () => { it("adds instructions for openai-codex probes", () => { expect( resolveLiveSystemPrompt({ provider: "openai-codex", } as Model), ).toContain("Follow the user's instruction exactly."); }); it("keeps other providers unchanged", () => { expect( resolveLiveSystemPrompt({ provider: "openai", } as Model), ).toBeUndefined(); }); }); async function completeSimpleWithTimeout( model: Model, context: Parameters>[1], options: Parameters>[2], timeoutMs: number, progressContext: string, ) { const maxTimeoutMs = Math.max(1, timeoutMs); const controller = new AbortController(); const abortTimer = setTimeout(() => { controller.abort(); }, maxTimeoutMs); abortTimer.unref?.(); let hardTimer: ReturnType | undefined; const timeout = new Promise((_, reject) => { hardTimer = setTimeout(() => { reject(new Error(`model call timed out after ${maxTimeoutMs}ms`)); }, maxTimeoutMs); hardTimer.unref?.(); }); try { return await withLiveHeartbeat( Promise.race([ completeSimple(model, context, { ...options, signal: controller.signal, }), timeout, ]), progressContext, ); } finally { clearTimeout(abortTimer); if (hardTimer) { clearTimeout(hardTimer); } } } async function completeOkWithRetry(params: { model: Model; apiKey: string; timeoutMs: number; progressLabel: string; }) { const runOnce = async (maxTokens: number) => { const res = await completeSimpleWithTimeout( params.model, { systemPrompt: resolveLiveSystemPrompt(params.model), messages: [ { role: "user", content: "Reply with the word ok.", timestamp: Date.now(), }, ], }, { apiKey: params.apiKey, reasoning: resolveTestReasoning(params.model), maxTokens, }, params.timeoutMs, `${params.progressLabel}: prompt call (maxTokens=${maxTokens})`, ); const text = res.content .filter((block) => block.type === "text") .map((block) => block.text.trim()) .join(" "); return { res, text }; }; const first = await runOnce(64); if (first.text.length > 0) { return first; } // Some providers (for example Moonshot Kimi and MiniMax M2.5) may emit // reasoning blocks first and only return text once token budget is higher. return await runOnce(256); } describeLive("live models (profile keys)", () => { it( "completes across selected models", async () => { logProgress("[live-models] loading config"); const cfg = await withLiveStageTimeout( Promise.resolve().then(() => loadConfig()), "[live-models] load config", ); logProgress("[live-models] preparing models.json"); await withLiveStageTimeout( ensureOpenClawModelsJson(cfg), "[live-models] prepare models.json", ); if (!DIRECT_ENABLED) { logProgress( "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|all|; all=modern)", ); return; } const anthropicKeys = collectAnthropicApiKeys(); if (anthropicKeys.length > 0) { process.env.ANTHROPIC_API_KEY = anthropicKeys[0]; logProgress(`[live-models] anthropic keys loaded: ${anthropicKeys.length}`); } const agentDir = resolveOpenClawAgentDir(); const authStorage = discoverAuthStorage(agentDir); logProgress("[live-models] loading model registry"); const models = await withLiveStageTimeout( Promise.resolve().then(() => discoverModels(authStorage, agentDir).getAll()), "[live-models] load model registry", ); const rawModels = process.env.OPENCLAW_LIVE_MODELS?.trim(); const useModern = rawModels === "modern" || rawModels === "all"; const useExplicit = Boolean(rawModels) && !useModern; const filter = useExplicit ? parseModelFilter(rawModels) : null; const allowNotFoundSkip = useModern; const providers = parseProviderFilter(process.env.OPENCLAW_LIVE_PROVIDERS); const perModelTimeoutMs = toInt(process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS, 30_000); const maxModels = toInt(process.env.OPENCLAW_LIVE_MAX_MODELS, 0); const failures: Array<{ model: string; error: string }> = []; const skipped: Array<{ model: string; reason: string }> = []; const candidates: Array<{ model: Model; apiKeyInfo: Awaited>; }> = []; for (const model of models) { if (shouldSuppressBuiltInModel({ provider: model.provider, id: model.id })) { continue; } if (providers && !providers.has(model.provider)) { continue; } const id = `${model.provider}/${model.id}`; if (filter && !filter.has(id)) { continue; } if (!filter && useModern) { if (!isHighSignalLiveModelRef({ provider: model.provider, id: model.id })) { continue; } } try { const apiKeyInfo = await getApiKeyForModel({ model, cfg }); if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) { skipped.push({ model: id, reason: `non-profile credential source: ${apiKeyInfo.source}`, }); continue; } candidates.push({ model, apiKeyInfo }); } catch (err) { skipped.push({ model: id, reason: String(err) }); } } if (candidates.length === 0) { logProgress("[live-models] no API keys found; skipping"); return; } const selectedCandidates = capByProviderSpread( candidates, maxModels > 0 ? maxModels : candidates.length, (entry) => entry.model.provider, ); logProgress(`[live-models] selection=${useExplicit ? "explicit" : "high-signal"}`); if (selectedCandidates.length < candidates.length) { logProgress( `[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`, ); } logProgress(`[live-models] running ${selectedCandidates.length} models`); logProgress( `[live-models] heartbeat=${formatElapsedSeconds(LIVE_HEARTBEAT_MS)} timeout=${formatElapsedSeconds(perModelTimeoutMs)}`, ); const total = selectedCandidates.length; for (const [index, entry] of selectedCandidates.entries()) { const { model, apiKeyInfo } = entry; const id = `${model.provider}/${model.id}`; const progressLabel = `[live-models] ${index + 1}/${total} ${id}`; const attemptMax = model.provider === "anthropic" && anthropicKeys.length > 0 ? anthropicKeys.length : 1; for (let attempt = 0; attempt < attemptMax; attempt += 1) { if (model.provider === "anthropic" && anthropicKeys.length > 0) { process.env.ANTHROPIC_API_KEY = anthropicKeys[attempt]; } const apiKey = model.provider === "anthropic" && anthropicKeys.length > 0 ? anthropicKeys[attempt] : requireApiKey(apiKeyInfo, model.provider); try { // Special regression: OpenAI requires replayed `reasoning` items for tool-only turns. if ( model.provider === "openai" && model.api === "openai-responses" && model.id === "gpt-5.2" ) { logProgress(`${progressLabel}: tool-only regression`); const noopTool = { name: "noop", description: "Return ok.", parameters: Type.Object({}, { additionalProperties: false }), }; let firstUserContent = "Call the tool `noop` with {}. Do not write any other text."; let firstUser = { role: "user" as const, content: firstUserContent, timestamp: Date.now(), }; let first = await completeSimpleWithTimeout( model, { messages: [firstUser], tools: [noopTool] }, { apiKey, reasoning: resolveTestReasoning(model), maxTokens: 128, }, perModelTimeoutMs, `${progressLabel}: tool-only regression first call`, ); let toolCall = first.content.find((b) => b.type === "toolCall"); let firstText = first.content .filter((b) => b.type === "text") .map((b) => b.text.trim()) .join(" ") .trim(); // Occasional flake: model answers in text instead of tool call (or adds text). // Retry a couple times with a stronger instruction so we still exercise the tool-only replay path. for (let i = 0; i < 2 && (!toolCall || firstText.length > 0); i += 1) { firstUserContent = "Call the tool `noop` with {}. IMPORTANT: respond ONLY with the tool call; no other text."; firstUser = { role: "user" as const, content: firstUserContent, timestamp: Date.now(), }; first = await completeSimpleWithTimeout( model, { messages: [firstUser], tools: [noopTool] }, { apiKey, reasoning: resolveTestReasoning(model), maxTokens: 128, }, perModelTimeoutMs, `${progressLabel}: tool-only regression retry ${i + 1}`, ); toolCall = first.content.find((b) => b.type === "toolCall"); firstText = first.content .filter((b) => b.type === "text") .map((b) => b.text.trim()) .join(" ") .trim(); } expect(toolCall).toBeTruthy(); expect(firstText.length).toBe(0); if (!toolCall || toolCall.type !== "toolCall") { throw new Error("expected tool call"); } const second = await completeSimpleWithTimeout( model, { messages: [ firstUser, first, { role: "toolResult", toolCallId: toolCall.id, toolName: "noop", content: [{ type: "text", text: "ok" }], isError: false, timestamp: Date.now(), }, { role: "user", content: "Reply with the word ok.", timestamp: Date.now(), }, ], }, { apiKey, reasoning: resolveTestReasoning(model), // Headroom: reasoning summary can consume most of the output budget. maxTokens: 256, }, perModelTimeoutMs, `${progressLabel}: tool-only regression followup`, ); const secondText = second.content .filter((b) => b.type === "text") .map((b) => b.text.trim()) .join(" "); expect(secondText.length).toBeGreaterThan(0); logProgress(`${progressLabel}: done`); break; } logProgress(`${progressLabel}: prompt`); const ok = await completeOkWithRetry({ model, apiKey, timeoutMs: perModelTimeoutMs, progressLabel, }); if (ok.res.stopReason === "error") { const msg = ok.res.errorMessage ?? ""; if (allowNotFoundSkip && isModelNotFoundErrorMessage(msg)) { skipped.push({ model: id, reason: msg }); logProgress(`${progressLabel}: skip (model not found)`); break; } throw new Error(msg || "model returned error with no message"); } if ( ok.text.length === 0 && (model.provider === "google" || model.provider === "google-gemini-cli") ) { skipped.push({ model: id, reason: "no text returned (likely unavailable model id)", }); logProgress(`${progressLabel}: skip (google model not found)`); break; } if ( ok.text.length === 0 && (model.provider === "openrouter" || model.provider === "opencode" || model.provider === "opencode-go") ) { skipped.push({ model: id, reason: "no text returned (provider returned empty content)", }); logProgress(`${progressLabel}: skip (empty response)`); break; } if ( ok.text.length === 0 && allowNotFoundSkip && (model.provider === "minimax" || model.provider === "zai") ) { skipped.push({ model: id, reason: "no text returned (provider returned empty content)", }); logProgress(`${progressLabel}: skip (empty response)`); break; } if ( ok.text.length === 0 && allowNotFoundSkip && (model.provider === "google-antigravity" || model.provider === "openai-codex") ) { skipped.push({ model: id, reason: "no text returned (provider returned empty content)", }); logProgress(`${progressLabel}: skip (empty response)`); break; } expect(ok.text.length).toBeGreaterThan(0); logProgress(`${progressLabel}: done`); break; } catch (err) { const message = String(err); if ( model.provider === "anthropic" && isAnthropicRateLimitError(message) && attempt + 1 < attemptMax ) { logProgress(`${progressLabel}: rate limit, retrying with next key`); continue; } if (model.provider === "anthropic" && isAnthropicRateLimitError(message)) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (anthropic rate limit)`); break; } if (model.provider === "anthropic" && isAnthropicBillingError(message)) { if (attempt + 1 < attemptMax) { logProgress(`${progressLabel}: billing issue, retrying with next key`); continue; } skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (anthropic billing)`); break; } if ( (model.provider === "google" || model.provider === "google-gemini-cli") && isGoogleModelNotFoundError(err) ) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (google model not found)`); break; } if ( allowNotFoundSkip && model.provider === "minimax" && message.includes("request ended without sending any chunks") ) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (minimax empty response)`); break; } if ( allowNotFoundSkip && (model.provider === "minimax" || model.provider === "zai") && isRateLimitErrorMessage(message) ) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (rate limit)`); break; } if ( allowNotFoundSkip && (model.provider === "opencode" || model.provider === "opencode-go") && isRateLimitErrorMessage(message) ) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (rate limit)`); break; } if ( allowNotFoundSkip && model.provider === "openai-codex" && isRefreshTokenReused(message) ) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (codex refresh token reused)`); break; } if ( allowNotFoundSkip && model.provider === "openai-codex" && isChatGPTUsageLimitErrorMessage(message) ) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (chatgpt usage limit)`); break; } if ( allowNotFoundSkip && model.provider === "openai-codex" && isInstructionsRequiredError(message) ) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (instructions required)`); break; } if (allowNotFoundSkip && isModelTimeoutError(message)) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (timeout)`); break; } if (allowNotFoundSkip && isProviderUnavailableErrorMessage(message)) { skipped.push({ model: id, reason: message }); logProgress(`${progressLabel}: skip (provider unavailable)`); break; } logProgress(`${progressLabel}: failed`); failures.push({ model: id, error: message }); break; } } } if (failures.length > 0) { const preview = formatFailurePreview(failures, 20); throw new Error( `live model failures (${failures.length}, showing ${Math.min(failures.length, 20)}):\n${preview}`, ); } void skipped; }, 15 * 60 * 1000, ); });