diff --git a/home-manager/progs/pi.nix b/home-manager/progs/pi.nix index c209510..334dff4 100644 --- a/home-manager/progs/pi.nix +++ b/home-manager/progs/pi.nix @@ -37,7 +37,9 @@ let in { home.packages = [ - inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.omp + (inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.omp.overrideAttrs (old: { + patches = (old.patches or [ ]) ++ [ ../../patches/omp-fix-auth.patch ]; + })) ]; # main settings: ~/.omp/agent/config.yml (JSON is valid YAML) diff --git a/patches/omp-fix-auth.patch b/patches/omp-fix-auth.patch new file mode 100644 index 0000000..d6b2472 --- /dev/null +++ b/patches/omp-fix-auth.patch @@ -0,0 +1,293 @@ +commit 31d537735d3c5e25a2b620d63ebbfea4cf84f57e +Author: Simon Gardling +Date: Wed Apr 8 13:05:09 2026 -0400 + + Fix key reauth with parallel sessions + +diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md +index 3f50b7bd4..248dacbff 100644 +--- a/packages/ai/CHANGELOG.md ++++ b/packages/ai/CHANGELOG.md +@@ -5,6 +5,10 @@ + + - Removed `coerceNullStrings` function and its automatic null-string coercion behavior from JSON parsing + ++### Fixed ++ ++- Fixed concurrent OAuth refresh token rotation race: when multiple instances share the same credential DB, one instance refreshing a token no longer causes other instances to permanently disable the credential on `invalid_grant` ([#607](https://github.com/can1357/oh-my-pi/issues/607)) ++ + ## [13.19.0] - 2026-04-05 + + ### Fixed +diff --git a/packages/ai/src/auth-storage.ts b/packages/ai/src/auth-storage.ts +index 9fdb4473b..fac936dad 100644 +--- a/packages/ai/src/auth-storage.ts ++++ b/packages/ai/src/auth-storage.ts +@@ -1865,7 +1865,30 @@ export class AuthStorage { + }); + + if (isDefinitiveFailure) { +- // Permanently disable invalid credentials with an explicit cause for inspection/debugging ++ // Before permanently disabling, check if another instance already refreshed ++ // the token in the shared DB. Concurrent instances hold stale in-memory ++ // copies; refresh token rotation by one instance invalidates the token ++ // that other instances still hold. Re-read from DB before giving up. ++ if (/invalid_grant/i.test(errorMsg)) { ++ const entries = this.#getStoredCredentials(provider); ++ const entry = entries[selection.index]; ++ if (entry) { ++ const dbCredentials = this.#store.listAuthCredentials(provider); ++ const dbEntry = dbCredentials.find(row => row.id === entry.id); ++ if ( ++ dbEntry?.credential.type === "oauth" && ++ dbEntry.credential.refresh !== selection.credential.refresh ++ ) { ++ // DB has a newer refresh token — another instance refreshed. ++ // Update in-memory state and retry with the fresh credential. ++ const updated = [...entries]; ++ updated[selection.index] = { id: entry.id, credential: dbEntry.credential }; ++ this.#setStoredCredentials(provider, updated); ++ return this.getApiKey(provider, sessionId, options); ++ } ++ } ++ } ++ // Genuinely invalid — disable the credential + this.#disableCredentialAt(provider, selection.index, `oauth refresh failed: ${errorMsg}`); + if (this.#getCredentialsForProvider(provider).some(credential => credential.type === "oauth")) { + return this.getApiKey(provider, sessionId, options); +diff --git a/packages/ai/test/auth-storage-refresh-race.test.ts b/packages/ai/test/auth-storage-refresh-race.test.ts +new file mode 100644 +index 000000000..5a8098a18 +--- /dev/null ++++ b/packages/ai/test/auth-storage-refresh-race.test.ts +@@ -0,0 +1,230 @@ ++import { Database } from "bun:sqlite"; ++import { afterEach, beforeEach, describe, expect, test, vi } from "bun:test"; ++import * as fs from "node:fs/promises"; ++import * as os from "node:os"; ++import * as path from "node:path"; ++import { AuthCredentialStore, AuthStorage, type OAuthCredential } from "../src/auth-storage"; ++import * as oauthUtils from "../src/utils/oauth"; ++import type { OAuthCredentials } from "../src/utils/oauth/types"; ++ ++/** ++ * Tests for the concurrent OAuth refresh token rotation race condition. ++ * ++ * When multiple omp instances share the same SQLite credential DB but hold ++ * independent in-memory caches, refresh token rotation by one instance ++ * invalidates the token that other instances still hold. Without the fix, ++ * the stale instance permanently disables the credential on `invalid_grant` ++ * even though a valid refresh token exists in the DB. ++ */ ++ ++function readDisabledCauses(dbPath: string, provider: string): string[] { ++ const db = new Database(dbPath, { readonly: true }); ++ try { ++ const rows = db ++ .prepare( ++ "SELECT disabled_cause FROM auth_credentials WHERE provider = ? AND disabled_cause IS NOT NULL ORDER BY id ASC", ++ ) ++ .all(provider) as Array<{ disabled_cause?: string | null }>; ++ return rows.flatMap(row => (typeof row.disabled_cause === "string" ? [row.disabled_cause] : [])); ++ } finally { ++ db.close(); ++ } ++} ++ ++function countActiveCredentials(dbPath: string, provider: string): number { ++ const db = new Database(dbPath, { readonly: true }); ++ try { ++ const row = db ++ .prepare("SELECT COUNT(*) AS count FROM auth_credentials WHERE provider = ? AND disabled_cause IS NULL") ++ .get(provider) as { count?: number } | undefined; ++ return row?.count ?? 0; ++ } finally { ++ db.close(); ++ } ++} ++ ++const EXPIRED_TOKEN_EXPIRES = Date.now() - 60_000; ++const FRESH_TOKEN_EXPIRES = Date.now() + 3_600_000; ++ ++function makeOAuthCredential(suffix: string, opts?: { expires?: number }): OAuthCredential { ++ return { ++ type: "oauth", ++ access: `access-${suffix}`, ++ refresh: `refresh-${suffix}`, ++ expires: opts?.expires ?? FRESH_TOKEN_EXPIRES, ++ accountId: "acct-shared", ++ email: "user@example.com", ++ }; ++} ++ ++describe("AuthStorage concurrent OAuth refresh token rotation race", () => { ++ let tempDir = ""; ++ let dbPath = ""; ++ let storeA: AuthCredentialStore | null = null; ++ let storeB: AuthCredentialStore | null = null; ++ let authStorageA: AuthStorage | null = null; ++ let authStorageB: AuthStorage | null = null; ++ ++ beforeEach(async () => { ++ tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "pi-ai-auth-refresh-race-")); ++ dbPath = path.join(tempDir, "agent.db"); ++ ++ // Both stores point at the same SQLite DB — simulates two omp instances ++ storeA = await AuthCredentialStore.open(dbPath); ++ storeB = await AuthCredentialStore.open(dbPath); ++ }); ++ ++ afterEach(async () => { ++ vi.restoreAllMocks(); ++ storeA?.close(); ++ storeB?.close(); ++ storeA = null; ++ storeB = null; ++ authStorageA = null; ++ authStorageB = null; ++ if (tempDir) { ++ await fs.rm(tempDir, { recursive: true, force: true }); ++ tempDir = ""; ++ } ++ }); ++ ++ test("instance B recovers from invalid_grant when instance A already refreshed", async () => { ++ if (!storeA || !storeB) throw new Error("test setup failed"); ++ ++ // Store an expired OAuth credential — both instances will need to refresh ++ const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES }); ++ authStorageA = new AuthStorage(storeA); ++ await authStorageA.set("anthropic", [staleCredential]); ++ ++ // Instance B loads same credential from DB into its own in-memory cache ++ authStorageB = new AuthStorage(storeB); ++ await authStorageB.reload(); ++ ++ // The refreshed credential that instance A will produce ++ const refreshedCredential: OAuthCredentials = { ++ access: "access-v2", ++ refresh: "refresh-v2", ++ expires: FRESH_TOKEN_EXPIRES, ++ accountId: "acct-shared", ++ email: "user@example.com", ++ }; ++ ++ // Mock both refresh paths: ++ // - refreshOAuthToken: called by pre-refresh in #resolveOAuthApiKey for expired tokens ++ // - getOAuthApiKey: called by #tryOAuthCredential for the actual token exchange ++ const refreshSpy = vi.spyOn(oauthUtils, "refreshOAuthToken"); ++ const getApiKeySpy = vi.spyOn(oauthUtils, "getOAuthApiKey"); ++ ++ // Step 1: Instance A refreshes successfully ++ refreshSpy.mockImplementation(async (_provider, credential) => { ++ return { ...credential, ...refreshedCredential }; ++ }); ++ getApiKeySpy.mockImplementation(async (_provider, credentials) => { ++ const cred = credentials.anthropic as OAuthCredentials | undefined; ++ if (!cred) return null; ++ return { newCredentials: refreshedCredential, apiKey: "sk-ant-new-key" }; ++ }); ++ ++ const keyA = await authStorageA.getApiKey("anthropic", "session-a"); ++ expect(keyA).toBe("sk-ant-new-key"); ++ ++ // DB now has refreshed credential from instance A. ++ // Instance B still has stale refresh-v1 in memory. ++ ++ // Step 2: Instance B tries to refresh with stale token. ++ // The pre-refresh (refreshOAuthToken) runs first, then #tryOAuthCredential calls getOAuthApiKey. ++ // Both throw invalid_grant for stale tokens. After DB re-read, retry with fresh token succeeds. ++ let getApiKeyCallCount = 0; ++ refreshSpy.mockImplementation(async (_provider, credential) => { ++ if (credential.refresh === "refresh-v1") { ++ throw new Error("invalid_grant: Refresh token not found or invalid"); ++ } ++ return { ...credential, ...refreshedCredential }; ++ }); ++ getApiKeySpy.mockImplementation(async (_provider, credentials) => { ++ const cred = credentials.anthropic as OAuthCredentials | undefined; ++ if (!cred) return null; ++ getApiKeyCallCount++; ++ ++ if (cred.refresh === "refresh-v1") { ++ // Stale token — Anthropic would return invalid_grant ++ throw new Error("invalid_grant: Refresh token not found or invalid"); ++ } ++ if (cred.refresh === "refresh-v2") { ++ // Fresh token from instance A — success ++ return { newCredentials: refreshedCredential, apiKey: "sk-ant-new-key-b" }; ++ } ++ return null; ++ }); ++ ++ const keyB = await authStorageB.getApiKey("anthropic", "session-b"); ++ ++ // The credential must NOT be disabled — instance B should have recovered ++ expect(keyB).toBeDefined(); ++ expect(typeof keyB).toBe("string"); ++ ++ // DB should still have exactly 1 active credential, none disabled ++ expect(countActiveCredentials(dbPath, "anthropic")).toBe(1); ++ expect(readDisabledCauses(dbPath, "anthropic")).toEqual([]); ++ // The getOAuthApiKey mock must have been called at least twice: once with stale, once with fresh ++ expect(getApiKeyCallCount).toBeGreaterThanOrEqual(2); ++ }); ++ ++ test("credential is still disabled when DB has same stale token (genuine failure)", async () => { ++ if (!storeA || !storeB) throw new Error("test setup failed"); ++ ++ // Store an expired credential — only one instance, no concurrent refresh ++ const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES }); ++ authStorageA = new AuthStorage(storeA); ++ await authStorageA.set("anthropic", [staleCredential]); ++ ++ // Mock: always fail with invalid_grant (genuinely revoked token) ++ vi.spyOn(oauthUtils, "refreshOAuthToken").mockImplementation(async () => { ++ throw new Error("invalid_grant: Refresh token not found or invalid"); ++ }); ++ vi.spyOn(oauthUtils, "getOAuthApiKey").mockImplementation(async () => { ++ throw new Error("invalid_grant: Refresh token not found or invalid"); ++ }); ++ ++ const key = await authStorageA.getApiKey("anthropic", "session-a"); ++ ++ // Should return undefined — no valid credentials ++ expect(key).toBeUndefined(); ++ ++ // Credential should be disabled in DB ++ const causes = readDisabledCauses(dbPath, "anthropic"); ++ expect(causes.length).toBe(1); ++ expect(causes[0]).toContain("invalid_grant"); ++ }); ++ ++ test("terminates when DB token matches stale token (no concurrent refresh)", async () => { ++ if (!storeA || !storeB) throw new Error("test setup failed"); ++ ++ // Both instances share the same stale credential — nobody refreshed ++ const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES }); ++ authStorageA = new AuthStorage(storeA); ++ await authStorageA.set("anthropic", [staleCredential]); ++ ++ // Instance B loads same stale credential ++ authStorageB = new AuthStorage(storeB); ++ await authStorageB.reload(); ++ ++ // Mock: always fail — the token is genuinely revoked, nobody refreshed ++ vi.spyOn(oauthUtils, "refreshOAuthToken").mockImplementation(async () => { ++ throw new Error("invalid_grant: Refresh token not found or invalid"); ++ }); ++ vi.spyOn(oauthUtils, "getOAuthApiKey").mockImplementation(async () => { ++ throw new Error("invalid_grant: Refresh token not found or invalid"); ++ }); ++ ++ // Instance B fails. DB has the same token (nobody refreshed), so the ++ // fix correctly falls through to disable instead of retrying forever. ++ const keyB = await authStorageB.getApiKey("anthropic", "session-b"); ++ expect(keyB).toBeUndefined(); ++ ++ // Credential should be disabled — the fix did not prevent a genuine failure ++ const causes = readDisabledCauses(dbPath, "anthropic"); ++ expect(causes.length).toBe(1); ++ expect(causes[0]).toContain("invalid_grant"); ++ }); ++});